Enhance the speed of the "Find Targets" and "View Targets" modules and fix various bugs.
If the speed of the "Find Targets" and "View Targets" modules is optimized, users will experience significantly faster searches for genes, guides, and DNA sequences.
If the bugs are resolved, users will be able to operate the program without experiencing crashes.
index_data dictionary.index_data to a file with the .index extension.The absolute values of the start position in the CSPR file are naturally sorted from small to large, allowing for binary searching to quickly retrieve the guide sequence data from the indexed CSPR file. If the index file already exists, the program loads it into the index_data variable.
_index_ with two main sections:
In subsequent runs, if the index file already exists, the program deserializes and loads the indexed file into the _index_ variable.
Special thanks to David for stress-testing the program last week. The following issues have been resolved:
My next step is to address any bugs that David may encounter during testing. I’ll focus on stabilizing the modules with outstanding issues and proceed with the implementation of the Microbiome Analysis module.
|
@@ -2,12 +2,13 @@ from models.FindTargetsModel import FindTargetsModel
|
|
| 2 |
from views.FindTargetsView import FindTargetsView
|
| 3 |
from PyQt6.QtWidgets import QMessageBox
|
| 4 |
from PyQt6.QtCore import QTimer
|
|
|
|
| 5 |
|
| 6 |
class FindTargetsController:
|
| 7 |
def __init__(self, global_settings):
|
| 8 |
self.global_settings = global_settings
|
| 9 |
-
self.model =
|
| 10 |
-
self.view =
|
| 11 |
self.organism = None
|
| 12 |
self.endonuclease = None
|
| 13 |
self._input_data = None
|
|
@@ -15,17 +16,23 @@ class FindTargetsController:
|
|
| 15 |
|
| 16 |
# Connect to annotation file changes
|
| 17 |
self.global_settings.annotation_file_changed.connect(self._on_annotation_file_changed)
|
|
|
|
| 18 |
|
| 19 |
def _on_annotation_file_changed(self, new_annotation_file):
|
| 20 |
-
"""Handle annotation file changes by
|
| 21 |
try:
|
| 22 |
self.global_settings.logger.debug(f"FindTargetsController received new annotation file: {new_annotation_file}")
|
| 23 |
self._current_annotation_file = new_annotation_file
|
| 24 |
|
| 25 |
-
# Clear
|
| 26 |
-
self.view
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
except Exception as e:
|
| 30 |
self.global_settings.logger.error(f"Error handling annotation file change: {str(e)}")
|
| 31 |
|
|
@@ -35,31 +42,27 @@ class FindTargetsController:
|
|
| 35 |
self.view.push_button_view_targets.clicked.connect(self.view_targets)
|
| 36 |
|
| 37 |
def find_targets(self, input_data):
|
| 38 |
-
"""
|
| 39 |
try:
|
|
|
|
|
|
|
| 40 |
# Get current annotation file
|
| 41 |
current_annotation = self.global_settings.get_current_annotation_file()
|
| 42 |
input_data['annotation_file'] = current_annotation
|
| 43 |
self._current_annotation_file = current_annotation
|
|
|
|
| 44 |
|
| 45 |
-
#
|
| 46 |
-
self.
|
| 47 |
-
self.view = FindTargetsView(self.global_settings)
|
| 48 |
-
self._connect_signals()
|
| 49 |
-
|
| 50 |
-
self._input_data = input_data
|
| 51 |
|
| 52 |
-
#
|
| 53 |
main_window = self.global_settings.main_window
|
| 54 |
existing_tab = main_window.find_tab_by_title("Find Targets")
|
|
|
|
|
|
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
tab_index = main_window.view.tab_widget.indexOf(existing_tab)
|
| 59 |
-
main_window.view.tab_widget.removeTab(tab_index)
|
| 60 |
-
|
| 61 |
-
# Process data and create new tab
|
| 62 |
-
self._process_input_data(input_data)
|
| 63 |
|
| 64 |
except Exception as e:
|
| 65 |
self.global_settings.logger.error(f"Error in find_targets: {str(e)}")
|
|
@@ -68,24 +71,28 @@ class FindTargetsController:
|
|
| 68 |
def _process_input_data(self, input_data):
|
| 69 |
"""Process input data and update view"""
|
| 70 |
try:
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
self.global_settings.logger.debug(f"FindTargetsController processing input data: {input_data}")
|
| 75 |
self.organism = input_data['organism']
|
| 76 |
self.endonuclease = input_data['endonuclease']
|
| 77 |
|
| 78 |
# Get new results
|
|
|
|
| 79 |
results = self.model.find_targets(input_data)
|
|
|
|
|
|
|
| 80 |
self.global_settings.logger.debug(f"Found {len(results) if results else 0} targets")
|
| 81 |
|
| 82 |
# Update view with new results
|
|
|
|
| 83 |
if results:
|
| 84 |
self.view.display_results(results)
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
main_window.open_new_tab("Find Targets", self)
|
| 89 |
|
| 90 |
except Exception as e:
|
| 91 |
self.global_settings.logger.error(f"Error processing input data: {str(e)}")
|
|
@@ -93,12 +100,14 @@ class FindTargetsController:
|
|
| 93 |
QMessageBox.critical(self.view, "Error", f"An error occurred while processing data: {str(e)}")
|
| 94 |
|
| 95 |
def view_targets(self):
|
| 96 |
-
"""Handle view targets button click"""
|
| 97 |
try:
|
| 98 |
if not self.view:
|
| 99 |
return
|
| 100 |
|
| 101 |
selected_targets = self.view.get_selected_targets()
|
|
|
|
|
|
|
|
|
|
| 102 |
if not selected_targets:
|
| 103 |
QMessageBox.warning(self.view, "No Selection", "Please select targets to view.")
|
| 104 |
return
|
|
|
|
| 2 |
from views.FindTargetsView import FindTargetsView
|
| 3 |
from PyQt6.QtWidgets import QMessageBox
|
| 4 |
from PyQt6.QtCore import QTimer
|
| 5 |
+
import time
|
| 6 |
|
| 7 |
class FindTargetsController:
|
| 8 |
def __init__(self, global_settings):
|
| 9 |
self.global_settings = global_settings
|
| 10 |
+
self.model = FindTargetsModel(self.global_settings)
|
| 11 |
+
self.view = FindTargetsView(self.global_settings)
|
| 12 |
self.organism = None
|
| 13 |
self.endonuclease = None
|
| 14 |
self._input_data = None
|
|
|
|
| 16 |
|
| 17 |
# Connect to annotation file changes
|
| 18 |
self.global_settings.annotation_file_changed.connect(self._on_annotation_file_changed)
|
| 19 |
+
self._connect_signals()
|
| 20 |
|
| 21 |
def _on_annotation_file_changed(self, new_annotation_file):
|
| 22 |
+
"""Handle annotation file changes by clearing and updating results"""
|
| 23 |
try:
|
| 24 |
self.global_settings.logger.debug(f"FindTargetsController received new annotation file: {new_annotation_file}")
|
| 25 |
self._current_annotation_file = new_annotation_file
|
| 26 |
|
| 27 |
+
# Clear the current results
|
| 28 |
+
if self.view and hasattr(self.view, 'results_table'):
|
| 29 |
+
self.view.clear_results()
|
| 30 |
+
|
| 31 |
+
# If we have previous input data, rerun the search with the new annotation file
|
| 32 |
+
if self._input_data:
|
| 33 |
+
self._input_data['annotation_file'] = new_annotation_file
|
| 34 |
+
self._process_input_data(self._input_data)
|
| 35 |
+
|
| 36 |
except Exception as e:
|
| 37 |
self.global_settings.logger.error(f"Error handling annotation file change: {str(e)}")
|
| 38 |
|
|
|
|
| 42 |
self.view.push_button_view_targets.clicked.connect(self.view_targets)
|
| 43 |
|
| 44 |
def find_targets(self, input_data):
|
| 45 |
+
"""Process input data and update existing view or create new one"""
|
| 46 |
try:
|
| 47 |
+
start_time = time.time()
|
| 48 |
+
|
| 49 |
# Get current annotation file
|
| 50 |
current_annotation = self.global_settings.get_current_annotation_file()
|
| 51 |
input_data['annotation_file'] = current_annotation
|
| 52 |
self._current_annotation_file = current_annotation
|
| 53 |
+
self._input_data = input_data.copy() # Store a copy of the input data
|
| 54 |
|
| 55 |
+
# Process data and update view
|
| 56 |
+
self._process_input_data(input_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
# If there's no existing tab, create one
|
| 59 |
main_window = self.global_settings.main_window
|
| 60 |
existing_tab = main_window.find_tab_by_title("Find Targets")
|
| 61 |
+
if not existing_tab:
|
| 62 |
+
main_window.open_new_tab("Find Targets", self)
|
| 63 |
|
| 64 |
+
total_time = time.time() - start_time
|
| 65 |
+
self.global_settings.logger.debug(f"Total time to process find targets: {total_time:.2f} seconds")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
except Exception as e:
|
| 68 |
self.global_settings.logger.error(f"Error in find_targets: {str(e)}")
|
|
|
|
| 71 |
def _process_input_data(self, input_data):
|
| 72 |
"""Process input data and update view"""
|
| 73 |
try:
|
| 74 |
+
start_time = time.time()
|
| 75 |
+
|
|
|
|
| 76 |
self.global_settings.logger.debug(f"FindTargetsController processing input data: {input_data}")
|
| 77 |
self.organism = input_data['organism']
|
| 78 |
self.endonuclease = input_data['endonuclease']
|
| 79 |
|
| 80 |
# Get new results
|
| 81 |
+
search_start = time.time()
|
| 82 |
results = self.model.find_targets(input_data)
|
| 83 |
+
search_time = time.time() - search_start
|
| 84 |
+
self.global_settings.logger.debug(f"Time to search: {search_time:.2f} seconds")
|
| 85 |
self.global_settings.logger.debug(f"Found {len(results) if results else 0} targets")
|
| 86 |
|
| 87 |
# Update view with new results
|
| 88 |
+
view_start = time.time()
|
| 89 |
if results:
|
| 90 |
self.view.display_results(results)
|
| 91 |
+
view_time = time.time() - view_start
|
| 92 |
+
self.global_settings.logger.debug(f"Time to update view: {view_time:.2f} seconds")
|
| 93 |
|
| 94 |
+
total_time = time.time() - start_time
|
| 95 |
+
self.global_settings.logger.debug(f"Total time to process data: {total_time:.2f} seconds")
|
|
|
|
| 96 |
|
| 97 |
except Exception as e:
|
| 98 |
self.global_settings.logger.error(f"Error processing input data: {str(e)}")
|
|
|
|
| 100 |
QMessageBox.critical(self.view, "Error", f"An error occurred while processing data: {str(e)}")
|
| 101 |
|
| 102 |
def view_targets(self):
|
|
|
|
| 103 |
try:
|
| 104 |
if not self.view:
|
| 105 |
return
|
| 106 |
|
| 107 |
selected_targets = self.view.get_selected_targets()
|
| 108 |
+
print(f"Selected targets: {selected_targets}")
|
| 109 |
+
print(f"Organism: {self.organism}")
|
| 110 |
+
print(f"Endonuclease: {self.endonuclease}")
|
| 111 |
if not selected_targets:
|
| 112 |
QMessageBox.warning(self.view, "No Selection", "Please select targets to view.")
|
| 113 |
return
|
|
@@ -3,7 +3,7 @@ from PyQt6 import QtWidgets, QtCore, uic
|
|
| 3 |
from PyQt6.QtWidgets import QMainWindow
|
| 4 |
from views.HomeWindowView import HomeWindowView
|
| 5 |
from models.HomeWindowModel import HomeWindowModel
|
| 6 |
-
from utils.ui import show_error, show_message
|
| 7 |
from PyQt6.QtCore import QObject
|
| 8 |
from controllers.FindTargetsController import FindTargetsController
|
| 9 |
|
|
|
|
| 3 |
from PyQt6.QtWidgets import QMainWindow
|
| 4 |
from views.HomeWindowView import HomeWindowView
|
| 5 |
from models.HomeWindowModel import HomeWindowModel
|
| 6 |
+
from utils.ui import show_error, show_message
|
| 7 |
from PyQt6.QtCore import QObject
|
| 8 |
from controllers.FindTargetsController import FindTargetsController
|
| 9 |
|
|
@@ -1,83 +1,77 @@
|
|
| 1 |
-
import
|
| 2 |
-
from PyQt6 import
|
| 3 |
-
from PyQt6.QtWidgets import QMainWindow, QWidget, QVBoxLayout, QHBoxLayout
|
| 4 |
from views.MainWindowView import MainWindowView
|
| 5 |
from models.MainWindowModel import MainWindowModel
|
| 6 |
-
from
|
| 7 |
-
from utils.ui import show_error, show_message, scale_ui, center_ui, position_window
|
| 8 |
from utils.web import ncbi_page, repo_page, ncbi_blast_page
|
| 9 |
-
from PyQt6.QtCore import
|
| 10 |
-
import qdarktheme
|
| 11 |
from PyQt6.QtCore import QSize
|
|
|
|
| 12 |
|
| 13 |
-
class MainWindowController:
|
| 14 |
def __init__(self, global_settings):
|
|
|
|
| 15 |
self.global_settings = global_settings
|
| 16 |
-
self.
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
self.startup_controller = None
|
| 19 |
self.is_first_time_startup = self.global_settings.is_first_time_startup
|
| 20 |
-
|
| 21 |
-
# Single shared size for all regular tabs
|
| 22 |
self.shared_tab_size = QSize(850, 850)
|
| 23 |
-
# Separate size only for startup
|
| 24 |
self.startup_size = QSize(750, 550)
|
| 25 |
-
|
| 26 |
self.current_tab = None
|
| 27 |
|
| 28 |
try:
|
| 29 |
self.view = MainWindowView(global_settings)
|
| 30 |
self._setup_connections()
|
| 31 |
self._init_ui()
|
| 32 |
-
|
| 33 |
-
# Check and emit first_time_startup signal after initialization
|
| 34 |
self.global_settings.check_and_emit_first_time_startup()
|
| 35 |
except Exception as e:
|
|
|
|
| 36 |
show_error(self.global_settings, "Error initializing MainWindowController", str(e))
|
| 37 |
|
| 38 |
def _setup_connections(self):
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
# Title Bar
|
| 48 |
-
self.view.close_window_button.clicked.connect(self._close_window)
|
| 49 |
-
self.view.minimize_window_button.clicked.connect(self._minimize_window)
|
| 50 |
-
self.view.maximize_window_button.clicked.connect(self._maximize_window)
|
| 51 |
-
self.view.theme_toggle_button.clicked.connect(self._toggle_theme)
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
| 56 |
|
| 57 |
-
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
|
| 60 |
-
show_error(self.global_settings, "Error setting up connections in MainWindowController", str(e))
|
| 61 |
|
| 62 |
def _init_ui(self):
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
| 78 |
|
| 79 |
def _handle_first_time_startup(self):
|
| 80 |
-
self.
|
| 81 |
self.is_first_time_startup = True
|
| 82 |
self._open_startup_tab()
|
| 83 |
|
|
@@ -86,30 +80,25 @@ class MainWindowController:
|
|
| 86 |
self.startup_controller = self.global_settings.get_startup_window()
|
| 87 |
self.open_new_tab("Startup", self.startup_controller)
|
| 88 |
except Exception as e:
|
|
|
|
| 89 |
show_error(self.global_settings, "Error opening startup tab", str(e))
|
| 90 |
|
| 91 |
def _switch_to_home_from_startup(self):
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
self.startup_controller = None
|
| 105 |
-
else:
|
| 106 |
-
self.logger.warning("Startup tab not found when trying to close it")
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
except Exception as e:
|
| 111 |
-
self.logger.error(f"Error switching to home from startup: {str(e)}", exc_info=True)
|
| 112 |
-
show_error(self.global_settings, "Error switching to home tab", str(e))
|
| 113 |
|
| 114 |
def _center_window(self):
|
| 115 |
try:
|
|
@@ -117,43 +106,32 @@ class MainWindowController:
|
|
| 117 |
frame_geometry = self.view.frameGeometry()
|
| 118 |
frame_geometry.moveCenter(center_point)
|
| 119 |
self.view.move(frame_geometry.topLeft())
|
| 120 |
-
self.
|
| 121 |
except Exception as e:
|
| 122 |
-
self.
|
| 123 |
show_error(self.global_settings, "Error centering window", str(e))
|
| 124 |
|
| 125 |
-
def _open_home_tab(self):
|
| 126 |
-
try:
|
| 127 |
-
home_controller = self.global_settings.get_home_window()
|
| 128 |
-
self.open_new_tab("Home", home_controller)
|
| 129 |
-
except Exception as e:
|
| 130 |
-
show_error(self.global_settings, "Error opening home tab", str(e))
|
| 131 |
-
|
| 132 |
def _change_database_directory(self):
|
| 133 |
try:
|
| 134 |
new_directory = QtWidgets.QFileDialog.getExistingDirectory(
|
| 135 |
-
self.view, "Select Database Directory",
|
|
|
|
| 136 |
QtWidgets.QFileDialog.Option.ShowDirsOnly
|
| 137 |
)
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
except Exception as e:
|
| 145 |
-
self.
|
| 146 |
show_error(self.global_settings, "Error changing database directory", str(e))
|
| 147 |
|
| 148 |
-
def _process_valid_directory(self, new_directory):
|
| 149 |
-
self.global_settings.save_db_path(new_directory)
|
| 150 |
-
self.global_settings.update_db_state()
|
| 151 |
-
show_message("Success", "Database directory changed successfully.")
|
| 152 |
-
|
| 153 |
-
# If we're currently on the startup tab, switch to the home tab
|
| 154 |
-
if self.startup_controller and self.view.tab_widget.currentWidget() == self.startup_controller.view:
|
| 155 |
-
self._switch_to_home_from_startup()
|
| 156 |
-
|
| 157 |
def _handle_invalid_directory(self, new_directory, message):
|
| 158 |
reply = QtWidgets.QMessageBox.question(
|
| 159 |
self.view,
|
|
@@ -171,6 +149,19 @@ class MainWindowController:
|
|
| 171 |
else:
|
| 172 |
show_message("Operation Cancelled", "Database directory change cancelled.")
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
def _open_ncbi_website(self):
|
| 175 |
ncbi_page()
|
| 176 |
|
|
@@ -194,53 +185,69 @@ class MainWindowController:
|
|
| 194 |
|
| 195 |
def _on_tab_closed(self, widget):
|
| 196 |
"""
|
| 197 |
-
Handle the tab_closed signal
|
| 198 |
"""
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
self.
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
def open_new_tab(self, title, content):
|
|
|
|
| 207 |
try:
|
| 208 |
-
self.
|
| 209 |
|
| 210 |
# Check if the tab already exists
|
| 211 |
existing_tab = self.find_tab_by_title(title)
|
| 212 |
if existing_tab:
|
| 213 |
-
self.
|
| 214 |
self.view.tab_widget.setCurrentWidget(existing_tab)
|
| 215 |
self._resize_for_tab(title)
|
| 216 |
return
|
| 217 |
|
| 218 |
-
#
|
| 219 |
if hasattr(content, 'view'):
|
| 220 |
widget = content.view
|
|
|
|
|
|
|
| 221 |
else:
|
| 222 |
widget = content
|
| 223 |
|
| 224 |
-
# Create
|
| 225 |
wrapper = QWidget()
|
| 226 |
layout = QVBoxLayout(wrapper)
|
| 227 |
layout.setContentsMargins(10, 10, 10, 10)
|
| 228 |
layout.addWidget(widget)
|
| 229 |
|
| 230 |
-
# Add the wrapper to the tab widget
|
| 231 |
index = self.view.tab_widget.addTab(wrapper, title)
|
| 232 |
self.view.tab_widget.setCurrentIndex(index)
|
| 233 |
-
self.tab_widgets[title] = wrapper
|
| 234 |
|
| 235 |
self._resize_for_tab(title)
|
| 236 |
-
|
| 237 |
-
|
| 238 |
except Exception as e:
|
| 239 |
-
self.
|
| 240 |
show_error(self.global_settings, f"Error opening tab '{title}'", str(e))
|
| 241 |
|
| 242 |
-
self.view.tab_widget.currentChanged.connect(self._on_tab_changed)
|
| 243 |
-
|
| 244 |
def _resize_for_tab(self, title):
|
| 245 |
if title == "Startup":
|
| 246 |
# For Startup tab, set fixed size and disable maximize button
|
|
@@ -251,7 +258,10 @@ class MainWindowController:
|
|
| 251 |
self.view.setMinimumSize(QSize(400, 300))
|
| 252 |
self.view.setMaximumSize(QtCore.QSize(16777215, 16777215))
|
| 253 |
self.view.setWindowFlags(self.view.windowFlags() | Qt.WindowType.WindowMaximizeButtonHint)
|
| 254 |
-
|
|
|
|
|
|
|
|
|
|
| 255 |
|
| 256 |
# Ensure window flags are updated
|
| 257 |
self.view.show()
|
|
@@ -260,30 +270,35 @@ class MainWindowController:
|
|
| 260 |
self.current_tab = title
|
| 261 |
|
| 262 |
def _close_tab(self, index):
|
|
|
|
|
|
|
|
|
|
| 263 |
if 0 <= index < self.view.tab_widget.count():
|
| 264 |
-
widget = self.view.tab_widget.widget(index)
|
| 265 |
title = self.view.tab_widget.tabText(index)
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
self.logger.debug(f"Closed tab '{title}' at index {index}")
|
| 273 |
|
| 274 |
-
#
|
| 275 |
if title == "New Genome":
|
| 276 |
home_tab = self.find_tab_by_title("Home")
|
| 277 |
if home_tab:
|
| 278 |
home_controller = self.global_settings.get_home_window()
|
| 279 |
home_controller.refresh_data()
|
| 280 |
-
else:
|
| 281 |
-
self.logger.warning(f"Attempted to close non-existent tab at index {index}")
|
| 282 |
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
|
|
|
| 287 |
|
| 288 |
def _toggle_theme(self):
|
| 289 |
try:
|
|
@@ -299,7 +314,8 @@ class MainWindowController:
|
|
| 299 |
if saved_position:
|
| 300 |
self.view.move(saved_position)
|
| 301 |
else:
|
| 302 |
-
center_ui(self.view)
|
|
|
|
| 303 |
self.view.show()
|
| 304 |
self.view.apply_theme()
|
| 305 |
except Exception as e:
|
|
@@ -332,22 +348,8 @@ class MainWindowController:
|
|
| 332 |
self.logger.debug(f"Window geometry after opening New Genome tab: {self.view.geometry()}")
|
| 333 |
|
| 334 |
def find_tab_by_title(self, title):
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
return self.view.tab_widget.widget(i)
|
| 338 |
-
return None
|
| 339 |
-
|
| 340 |
-
def _on_tab_changed(self, index):
|
| 341 |
-
# Save the current size before switching if it's not the startup tab
|
| 342 |
-
if self.current_tab and self.current_tab != "Startup":
|
| 343 |
-
current_size = self.view.size()
|
| 344 |
-
if current_size.width() >= 400 and current_size.height() >= 300:
|
| 345 |
-
# Update shared size for all non-startup tabs
|
| 346 |
-
self.shared_tab_size = current_size
|
| 347 |
-
|
| 348 |
-
# Get the new tab title and resize
|
| 349 |
-
new_tab_title = self.view.tab_widget.tabText(index)
|
| 350 |
-
self._resize_for_tab(new_tab_title)
|
| 351 |
|
| 352 |
def close_new_genome_and_switch_to_home(self):
|
| 353 |
try:
|
|
|
|
| 1 |
+
from PyQt6 import QtWidgets, QtCore, QtGui
|
| 2 |
+
from PyQt6.QtWidgets import QWidget, QVBoxLayout
|
|
|
|
| 3 |
from views.MainWindowView import MainWindowView
|
| 4 |
from models.MainWindowModel import MainWindowModel
|
| 5 |
+
from utils.ui import show_error, show_message
|
|
|
|
| 6 |
from utils.web import ncbi_page, repo_page, ncbi_blast_page
|
| 7 |
+
from PyQt6.QtCore import Qt
|
|
|
|
| 8 |
from PyQt6.QtCore import QSize
|
| 9 |
+
from utils.LoggingMixin import LoggingMixin
|
| 10 |
|
| 11 |
+
class MainWindowController(LoggingMixin):
|
| 12 |
def __init__(self, global_settings):
|
| 13 |
+
LoggingMixin.__init__(self)
|
| 14 |
self.global_settings = global_settings
|
| 15 |
+
self.tab_widgets = {
|
| 16 |
+
'widgets': {},
|
| 17 |
+
'controllers': {}
|
| 18 |
+
}
|
| 19 |
self.startup_controller = None
|
| 20 |
self.is_first_time_startup = self.global_settings.is_first_time_startup
|
|
|
|
|
|
|
| 21 |
self.shared_tab_size = QSize(850, 850)
|
|
|
|
| 22 |
self.startup_size = QSize(750, 550)
|
|
|
|
| 23 |
self.current_tab = None
|
| 24 |
|
| 25 |
try:
|
| 26 |
self.view = MainWindowView(global_settings)
|
| 27 |
self._setup_connections()
|
| 28 |
self._init_ui()
|
|
|
|
|
|
|
| 29 |
self.global_settings.check_and_emit_first_time_startup()
|
| 30 |
except Exception as e:
|
| 31 |
+
self.log_error("__init__", e)
|
| 32 |
show_error(self.global_settings, "Error initializing MainWindowController", str(e))
|
| 33 |
|
| 34 |
def _setup_connections(self):
|
| 35 |
+
self.log_method_call("_setup_connections")
|
| 36 |
+
|
| 37 |
+
# menuBar
|
| 38 |
+
self.view.action_change_database_directory.triggered.connect(self._change_database_directory)
|
| 39 |
+
self.view.action_open_repository.triggered.connect(self._open_repository_website)
|
| 40 |
+
self.view.action_open_NCBI_BLAST.triggered.connect(self._open_ncbi_blast_website)
|
| 41 |
+
self.view.action_open_NCBI.triggered.connect(self._open_ncbi_website)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
# Title Bar
|
| 44 |
+
self.view.close_window_button.clicked.connect(self._close_window)
|
| 45 |
+
self.view.minimize_window_button.clicked.connect(self._minimize_window)
|
| 46 |
+
self.view.maximize_window_button.clicked.connect(self._maximize_window)
|
| 47 |
+
self.view.theme_toggle_button.clicked.connect(self._toggle_theme)
|
| 48 |
|
| 49 |
+
# Tab bar
|
| 50 |
+
self.view.tab_widget.tab_closed.connect(self._on_tab_closed)
|
| 51 |
+
self.view.tab_widget.tabCloseRequested.connect(self._close_tab)
|
| 52 |
|
| 53 |
+
self.global_settings.first_time_startup.connect(self._handle_first_time_startup)
|
|
|
|
| 54 |
|
| 55 |
def _init_ui(self):
|
| 56 |
+
self.log_method_call("_init_ui")
|
| 57 |
+
|
| 58 |
+
if self.is_first_time_startup:
|
| 59 |
+
self.log_info("First time startup detected. Opening startup tab.")
|
| 60 |
+
self._open_startup_tab()
|
| 61 |
+
return
|
| 62 |
+
|
| 63 |
+
db_path = self.global_settings.get_db_path()
|
| 64 |
+
is_valid, message = self.global_settings.validate_db_path(db_path)
|
| 65 |
+
|
| 66 |
+
if db_path and is_valid:
|
| 67 |
+
self.log_info(f"Database path is valid: {db_path}")
|
| 68 |
+
self._open_home_tab()
|
| 69 |
+
else:
|
| 70 |
+
self.log_warning(f"Invalid database path: {db_path}. {message}")
|
| 71 |
+
self._open_startup_tab()
|
| 72 |
|
| 73 |
def _handle_first_time_startup(self):
|
| 74 |
+
self.log_info("First time startup signal received")
|
| 75 |
self.is_first_time_startup = True
|
| 76 |
self._open_startup_tab()
|
| 77 |
|
|
|
|
| 80 |
self.startup_controller = self.global_settings.get_startup_window()
|
| 81 |
self.open_new_tab("Startup", self.startup_controller)
|
| 82 |
except Exception as e:
|
| 83 |
+
self.log_error("_open_startup_tab", e)
|
| 84 |
show_error(self.global_settings, "Error opening startup tab", str(e))
|
| 85 |
|
| 86 |
def _switch_to_home_from_startup(self):
|
| 87 |
+
self.log_method_call("_switch_to_home_from_startup")
|
| 88 |
+
|
| 89 |
+
startup_tab = self.find_tab_by_title("Startup")
|
| 90 |
+
if startup_tab:
|
| 91 |
+
index = self.view.tab_widget.indexOf(startup_tab)
|
| 92 |
+
self._close_tab(index)
|
| 93 |
+
|
| 94 |
+
if self.startup_controller:
|
| 95 |
+
self.startup_controller.deactivate()
|
| 96 |
+
self.startup_controller = None
|
| 97 |
+
else:
|
| 98 |
+
self.log_warning("Startup tab not found when trying to close it")
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
+
self.close_new_genome_and_switch_to_home()
|
| 101 |
+
self._center_window()
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
def _center_window(self):
|
| 104 |
try:
|
|
|
|
| 106 |
frame_geometry = self.view.frameGeometry()
|
| 107 |
frame_geometry.moveCenter(center_point)
|
| 108 |
self.view.move(frame_geometry.topLeft())
|
| 109 |
+
self.log_debug(f"Window centered at {self.view.pos()}")
|
| 110 |
except Exception as e:
|
| 111 |
+
self.log_error("_center_window", e)
|
| 112 |
show_error(self.global_settings, "Error centering window", str(e))
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
def _change_database_directory(self):
|
| 115 |
try:
|
| 116 |
new_directory = QtWidgets.QFileDialog.getExistingDirectory(
|
| 117 |
+
self.view, "Select Database Directory",
|
| 118 |
+
self.global_settings.get_db_path(),
|
| 119 |
QtWidgets.QFileDialog.Option.ShowDirsOnly
|
| 120 |
)
|
| 121 |
+
|
| 122 |
+
if not new_directory:
|
| 123 |
+
return
|
| 124 |
+
|
| 125 |
+
is_valid, message = self.global_settings.validate_db_path(new_directory)
|
| 126 |
+
if is_valid:
|
| 127 |
+
self._process_valid_directory(new_directory)
|
| 128 |
+
else:
|
| 129 |
+
self._handle_invalid_directory(new_directory, message)
|
| 130 |
+
|
| 131 |
except Exception as e:
|
| 132 |
+
self.log_error("_change_database_directory", e)
|
| 133 |
show_error(self.global_settings, "Error changing database directory", str(e))
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
def _handle_invalid_directory(self, new_directory, message):
|
| 136 |
reply = QtWidgets.QMessageBox.question(
|
| 137 |
self.view,
|
|
|
|
| 149 |
else:
|
| 150 |
show_message("Operation Cancelled", "Database directory change cancelled.")
|
| 151 |
|
| 152 |
+
def _process_valid_directory(self, new_directory):
|
| 153 |
+
try:
|
| 154 |
+
self.global_settings.save_db_path(new_directory)
|
| 155 |
+
self.global_settings.update_db_state()
|
| 156 |
+
show_message("Success", "Database directory changed successfully.")
|
| 157 |
+
|
| 158 |
+
if (self.startup_controller and
|
| 159 |
+
self.view.tab_widget.currentWidget() == self.startup_controller.view):
|
| 160 |
+
self._switch_to_home_from_startup()
|
| 161 |
+
except Exception as e:
|
| 162 |
+
self.log_error("_process_valid_directory", e)
|
| 163 |
+
show_error(self.global_settings, "Error processing directory", str(e))
|
| 164 |
+
|
| 165 |
def _open_ncbi_website(self):
|
| 166 |
ncbi_page()
|
| 167 |
|
|
|
|
| 185 |
|
| 186 |
def _on_tab_closed(self, widget):
|
| 187 |
"""
|
| 188 |
+
Handle the tab_closed signal from CloseableTabWidget
|
| 189 |
"""
|
| 190 |
+
try:
|
| 191 |
+
# Remove references from both widgets and controllers dictionaries
|
| 192 |
+
for title in list(self.tab_widgets['widgets'].keys()):
|
| 193 |
+
if self.tab_widgets['widgets'][title] == widget:
|
| 194 |
+
self.logger.info(f"Tab '{title}' closed. Cleaning up references.")
|
| 195 |
+
del self.tab_widgets['widgets'][title]
|
| 196 |
+
if title in self.tab_widgets['controllers']:
|
| 197 |
+
del self.tab_widgets['controllers'][title]
|
| 198 |
+
break
|
| 199 |
+
except Exception as e:
|
| 200 |
+
self.logger.error(f"Error in _on_tab_closed: {str(e)}")
|
| 201 |
+
|
| 202 |
+
def _open_home_tab(self):
|
| 203 |
+
"""Opens the home tab"""
|
| 204 |
+
try:
|
| 205 |
+
home_controller = self.global_settings.get_home_window()
|
| 206 |
+
self.open_new_tab("Home", home_controller)
|
| 207 |
+
self.log_info("Home tab opened successfully")
|
| 208 |
+
except Exception as e:
|
| 209 |
+
self.log_error("_open_home_tab", e)
|
| 210 |
+
show_error(self.global_settings, "Error opening home tab", str(e))
|
| 211 |
|
| 212 |
def open_new_tab(self, title, content):
|
| 213 |
+
"""Opens a new tab with the given title and content"""
|
| 214 |
try:
|
| 215 |
+
self.log_debug(f"Opening new tab: {title}")
|
| 216 |
|
| 217 |
# Check if the tab already exists
|
| 218 |
existing_tab = self.find_tab_by_title(title)
|
| 219 |
if existing_tab:
|
| 220 |
+
self.log_debug(f"Tab '{title}' already exists, switching to it")
|
| 221 |
self.view.tab_widget.setCurrentWidget(existing_tab)
|
| 222 |
self._resize_for_tab(title)
|
| 223 |
return
|
| 224 |
|
| 225 |
+
# Create widget from content
|
| 226 |
if hasattr(content, 'view'):
|
| 227 |
widget = content.view
|
| 228 |
+
# Store controller reference
|
| 229 |
+
self.tab_widgets['controllers'][title] = content
|
| 230 |
else:
|
| 231 |
widget = content
|
| 232 |
|
| 233 |
+
# Create wrapper widget with padding
|
| 234 |
wrapper = QWidget()
|
| 235 |
layout = QVBoxLayout(wrapper)
|
| 236 |
layout.setContentsMargins(10, 10, 10, 10)
|
| 237 |
layout.addWidget(widget)
|
| 238 |
|
| 239 |
+
# Add the wrapper to the tab widget and store reference
|
| 240 |
index = self.view.tab_widget.addTab(wrapper, title)
|
| 241 |
self.view.tab_widget.setCurrentIndex(index)
|
| 242 |
+
self.tab_widgets['widgets'][title] = wrapper
|
| 243 |
|
| 244 |
self._resize_for_tab(title)
|
| 245 |
+
self.log_info(f"Tab '{title}' opened successfully at index {index}")
|
| 246 |
+
|
| 247 |
except Exception as e:
|
| 248 |
+
self.log_error("open_new_tab", e)
|
| 249 |
show_error(self.global_settings, f"Error opening tab '{title}'", str(e))
|
| 250 |
|
|
|
|
|
|
|
| 251 |
def _resize_for_tab(self, title):
|
| 252 |
if title == "Startup":
|
| 253 |
# For Startup tab, set fixed size and disable maximize button
|
|
|
|
| 258 |
self.view.setMinimumSize(QSize(400, 300))
|
| 259 |
self.view.setMaximumSize(QtCore.QSize(16777215, 16777215))
|
| 260 |
self.view.setWindowFlags(self.view.windowFlags() | Qt.WindowType.WindowMaximizeButtonHint)
|
| 261 |
+
|
| 262 |
+
# Only resize if coming from Startup tab or if no current size is set
|
| 263 |
+
if self.current_tab == "Startup" or self.view.size() == self.startup_size:
|
| 264 |
+
self.view.resize(self.shared_tab_size)
|
| 265 |
|
| 266 |
# Ensure window flags are updated
|
| 267 |
self.view.show()
|
|
|
|
| 270 |
self.current_tab = title
|
| 271 |
|
| 272 |
def _close_tab(self, index):
|
| 273 |
+
"""
|
| 274 |
+
Handle tab closure using CloseableTabWidget
|
| 275 |
+
"""
|
| 276 |
if 0 <= index < self.view.tab_widget.count():
|
|
|
|
| 277 |
title = self.view.tab_widget.tabText(index)
|
| 278 |
+
|
| 279 |
+
# Let CloseableTabWidget handle the widget cleanup
|
| 280 |
+
self.view.tab_widget.closeTab(index)
|
| 281 |
+
|
| 282 |
+
# Clean up our references
|
| 283 |
+
if title in self.tab_widgets['widgets']:
|
| 284 |
+
del self.tab_widgets['widgets'][title]
|
| 285 |
+
if title in self.tab_widgets['controllers']:
|
| 286 |
+
del self.tab_widgets['controllers'][title]
|
| 287 |
+
|
| 288 |
self.logger.debug(f"Closed tab '{title}' at index {index}")
|
| 289 |
|
| 290 |
+
# Handle post-close operations
|
| 291 |
if title == "New Genome":
|
| 292 |
home_tab = self.find_tab_by_title("Home")
|
| 293 |
if home_tab:
|
| 294 |
home_controller = self.global_settings.get_home_window()
|
| 295 |
home_controller.refresh_data()
|
|
|
|
|
|
|
| 296 |
|
| 297 |
+
# Resize for the current tab
|
| 298 |
+
if self.view.tab_widget.count() > 0:
|
| 299 |
+
new_index = self.view.tab_widget.currentIndex()
|
| 300 |
+
new_tab_title = self.view.tab_widget.tabText(new_index)
|
| 301 |
+
self._resize_for_tab(new_tab_title)
|
| 302 |
|
| 303 |
def _toggle_theme(self):
|
| 304 |
try:
|
|
|
|
| 314 |
if saved_position:
|
| 315 |
self.view.move(saved_position)
|
| 316 |
else:
|
| 317 |
+
# center_ui(self.view)
|
| 318 |
+
pass
|
| 319 |
self.view.show()
|
| 320 |
self.view.apply_theme()
|
| 321 |
except Exception as e:
|
|
|
|
| 348 |
self.logger.debug(f"Window geometry after opening New Genome tab: {self.view.geometry()}")
|
| 349 |
|
| 350 |
def find_tab_by_title(self, title):
|
| 351 |
+
"""Find a tab by its title"""
|
| 352 |
+
return self.tab_widgets['widgets'].get(title)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
def close_new_genome_and_switch_to_home(self):
|
| 355 |
try:
|
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from PyQt6 import QtWidgets
|
| 2 |
-
from utils.ui import show_error, show_message
|
| 3 |
from views.PopulationAnalysisWindowView import PopulationAnalysisWindowView
|
| 4 |
from models.PopulationAnalysisWindowModel import PopulationAnalysisWindowModel
|
| 5 |
import logging
|
|
|
|
| 1 |
from PyQt6 import QtWidgets
|
| 2 |
+
from utils.ui import show_error, show_message
|
| 3 |
from views.PopulationAnalysisWindowView import PopulationAnalysisWindowView
|
| 4 |
from models.PopulationAnalysisWindowModel import PopulationAnalysisWindowModel
|
| 5 |
import logging
|
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from models.ScoringOptionsModel import ScoringOptionsModel
|
| 2 |
+
from views.ScoringOptionsView import ScoringOptionsView
|
| 3 |
+
|
| 4 |
+
class ScoringOptionsController:
|
| 5 |
+
def __init__(self, global_settings, view_targets_controller):
|
| 6 |
+
self.global_settings = global_settings
|
| 7 |
+
self.view_targets_controller = view_targets_controller
|
| 8 |
+
self.model = ScoringOptionsModel(global_settings)
|
| 9 |
+
self.view = ScoringOptionsView(global_settings)
|
| 10 |
+
|
| 11 |
+
# Connect signals
|
| 12 |
+
self.view.fasta_selected.connect(self._on_fasta_selected)
|
| 13 |
+
self.view.submit_clicked.connect(self._on_submit)
|
| 14 |
+
|
| 15 |
+
def show(self):
|
| 16 |
+
"""Show the scoring options window"""
|
| 17 |
+
self.view.show()
|
| 18 |
+
|
| 19 |
+
def _on_fasta_selected(self, fasta_path):
|
| 20 |
+
"""Handle FASTA file selection"""
|
| 21 |
+
# Get current chromosome from view targets
|
| 22 |
+
current_gene = self.view_targets_controller.view.combo_box_gene.currentText()
|
| 23 |
+
locus_tag = current_gene.split(': ')[0] if ': ' in current_gene else current_gene
|
| 24 |
+
print(f"Getting gene data for locus tag: {locus_tag}")
|
| 25 |
+
gene_data = self.view_targets_controller.model.get_gene_data(locus_tag)
|
| 26 |
+
|
| 27 |
+
if not gene_data or 'info' not in gene_data:
|
| 28 |
+
self.view.show_error("Error", "Could not get chromosome information for current gene")
|
| 29 |
+
return
|
| 30 |
+
|
| 31 |
+
# Load FASTA file
|
| 32 |
+
success = self.model.load_fasta(fasta_path, gene_data['info']['chromosome'])
|
| 33 |
+
if not success:
|
| 34 |
+
self.view.show_error("Error", "Failed to load FASTA file")
|
| 35 |
+
return
|
| 36 |
+
|
| 37 |
+
def _on_submit(self):
|
| 38 |
+
"""Handle submit button click"""
|
| 39 |
+
try:
|
| 40 |
+
# Validate inputs
|
| 41 |
+
fasta_path = self.view.get_fasta_path()
|
| 42 |
+
if not fasta_path:
|
| 43 |
+
self.view.show_error("Error", "Please select a FASTA file")
|
| 44 |
+
return
|
| 45 |
+
|
| 46 |
+
algorithm = self.view.get_selected_algorithm()
|
| 47 |
+
if not algorithm:
|
| 48 |
+
self.view.show_error("Error", "Please select a scoring algorithm")
|
| 49 |
+
return
|
| 50 |
+
|
| 51 |
+
# Get selected targets from view targets
|
| 52 |
+
selected_targets = self.view_targets_controller.view.get_selected_targets()
|
| 53 |
+
if not selected_targets:
|
| 54 |
+
self.view.show_error("Error", "No targets selected")
|
| 55 |
+
return
|
| 56 |
+
|
| 57 |
+
# Score sequences
|
| 58 |
+
scores, reject_list, guide_list = self.model.score_sequences(selected_targets, algorithm)
|
| 59 |
+
|
| 60 |
+
if scores is None:
|
| 61 |
+
self.view.show_error("Error", "Failed to score sequences")
|
| 62 |
+
return
|
| 63 |
+
|
| 64 |
+
# Report rejected sequences
|
| 65 |
+
if reject_list:
|
| 66 |
+
rejected_seqs = "\n".join([guide_list[i] for i in reject_list])
|
| 67 |
+
self.view.show_info(
|
| 68 |
+
"Sequences Not Found",
|
| 69 |
+
f"The following sequences were not found and scored as -1:\n{rejected_seqs}"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Update scores in view targets
|
| 73 |
+
self.view_targets_controller.update_scores(scores, algorithm)
|
| 74 |
+
|
| 75 |
+
self.view.close()
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
self.global_settings.logger.error(f"Error in scoring submission: {str(e)}")
|
| 79 |
+
self.view.show_error("Error", f"Error processing scores: {str(e)}")
|
|
@@ -1,11 +1,14 @@
|
|
| 1 |
import logging
|
|
|
|
| 2 |
from models.ViewTargetsModel import ViewTargetsModel
|
| 3 |
from views.ViewTargetsView import ViewTargetsView
|
| 4 |
from PyQt6.QtWidgets import QMessageBox
|
| 5 |
from utils.ui import show_error
|
| 6 |
import time
|
|
|
|
| 7 |
import traceback
|
| 8 |
import threading
|
|
|
|
| 9 |
|
| 10 |
class ViewTargetsController:
|
| 11 |
def __init__(self, global_settings):
|
|
@@ -31,6 +34,7 @@ class ViewTargetsController:
|
|
| 31 |
self.view.push_button_reset_location.clicked.connect(self.reset_location)
|
| 32 |
self.view.check_box_select_all.stateChanged.connect(self.select_all)
|
| 33 |
self.view.combo_box_gene.currentIndexChanged.connect(self.display_gene_data)
|
|
|
|
| 34 |
|
| 35 |
def load_targets(self, selected_targets, organism, endonuclease):
|
| 36 |
try:
|
|
@@ -51,6 +55,43 @@ class ViewTargetsController:
|
|
| 51 |
targets_time = time.time() - targets_start
|
| 52 |
self.global_settings.logger.debug(f"Getting targets took: {targets_time:.2f} seconds")
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
# Time displaying targets
|
| 55 |
display_start = time.time()
|
| 56 |
self.view.display_targets_in_table(targets)
|
|
@@ -76,44 +117,54 @@ class ViewTargetsController:
|
|
| 76 |
show_error(self.global_settings, "Error loading targets", str(e))
|
| 77 |
|
| 78 |
def load_gene_viewer(self):
|
|
|
|
| 79 |
try:
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
# Get
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
self.
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
#
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
# Update location fields if available
|
| 98 |
-
if 'info' in gene_data and 'feature_location' in gene_data['info']:
|
| 99 |
-
location = gene_data['info']['feature_location']
|
| 100 |
-
if ':' in location:
|
| 101 |
-
start, end = location.split(':')[0], location.split(':')[1].split('(')[0]
|
| 102 |
-
self.view.line_edit_start_location.setText(start)
|
| 103 |
-
self.view.line_edit_stop_location.setText(end)
|
| 104 |
-
|
| 105 |
-
# Pre-fetch next few genes in background thread
|
| 106 |
-
def prefetch_genes():
|
| 107 |
-
for gene in genes[1:5]: # Pre-fetch next 4 genes
|
| 108 |
-
self.model.get_gene_data(gene)
|
| 109 |
-
|
| 110 |
-
threading.Thread(target=prefetch_genes, daemon=True).start()
|
| 111 |
|
| 112 |
-
execution_time = time.time() - start_time
|
| 113 |
-
self.global_settings.logger.debug(f"Loading gene viewer took: {execution_time:.2f} seconds")
|
| 114 |
-
|
| 115 |
except Exception as e:
|
| 116 |
-
self.global_settings.logger.error(f"Error in load_gene_viewer: {str(e)}
|
|
|
|
| 117 |
|
| 118 |
def perform_off_target_analysis(self):
|
| 119 |
try:
|
|
@@ -142,6 +193,7 @@ class ViewTargetsController:
|
|
| 142 |
show_error(self.global_settings, "Error in cotargeting", str(e))
|
| 143 |
|
| 144 |
def highlight_gene_viewer(self):
|
|
|
|
| 145 |
try:
|
| 146 |
self.global_settings.logger.debug("Starting highlight_gene_viewer")
|
| 147 |
|
|
@@ -167,30 +219,23 @@ class ViewTargetsController:
|
|
| 167 |
|
| 168 |
# Get current gene sequence
|
| 169 |
current_gene = self.view.combo_box_gene.currentText()
|
| 170 |
-
|
|
|
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
| 175 |
QMessageBox.warning(self.view, "No Gene Data",
|
| 176 |
-
"Could not get gene
|
| 177 |
return
|
| 178 |
|
| 179 |
-
self.global_settings.logger.debug(f"Gene sequence length: {len(
|
| 180 |
|
| 181 |
# Highlight the sequences
|
| 182 |
if targets_to_highlight:
|
| 183 |
self.global_settings.logger.debug("Attempting to highlight sequences")
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
if highlighted_sequence:
|
| 187 |
-
self.global_settings.logger.debug("Successfully highlighted sequences")
|
| 188 |
-
self.global_settings.logger.debug(f"Highlighted sequence length: {len(highlighted_sequence)}")
|
| 189 |
-
self.view.update_gene_viewer(highlighted_sequence)
|
| 190 |
-
else:
|
| 191 |
-
self.global_settings.logger.error("Failed to highlight sequences - returned None")
|
| 192 |
-
QMessageBox.warning(self.view, "Highlighting Failed",
|
| 193 |
-
"Could not highlight the selected sequences. They may not be found in the current gene view.")
|
| 194 |
else:
|
| 195 |
self.global_settings.logger.error("No valid targets to highlight")
|
| 196 |
QMessageBox.warning(self.view, "No Valid Targets",
|
|
@@ -225,15 +270,23 @@ class ViewTargetsController:
|
|
| 225 |
show_error(self.global_settings, "Error showing filter options", str(e))
|
| 226 |
|
| 227 |
def show_scoring_options(self):
|
|
|
|
| 228 |
try:
|
| 229 |
-
|
| 230 |
-
self
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
except Exception as e:
|
| 236 |
-
|
|
|
|
|
|
|
| 237 |
|
| 238 |
def change_indices(self):
|
| 239 |
try:
|
|
@@ -287,3 +340,196 @@ class ViewTargetsController:
|
|
| 287 |
|
| 288 |
def show(self):
|
| 289 |
self.view.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
+
from controllers.ScoringOptionsController import ScoringOptionsController
|
| 3 |
from models.ViewTargetsModel import ViewTargetsModel
|
| 4 |
from views.ViewTargetsView import ViewTargetsView
|
| 5 |
from PyQt6.QtWidgets import QMessageBox
|
| 6 |
from utils.ui import show_error
|
| 7 |
import time
|
| 8 |
+
from PyQt6 import QtWidgets, QtCore
|
| 9 |
import traceback
|
| 10 |
import threading
|
| 11 |
+
from Bio.Seq import Seq
|
| 12 |
|
| 13 |
class ViewTargetsController:
|
| 14 |
def __init__(self, global_settings):
|
|
|
|
| 34 |
self.view.push_button_reset_location.clicked.connect(self.reset_location)
|
| 35 |
self.view.check_box_select_all.stateChanged.connect(self.select_all)
|
| 36 |
self.view.combo_box_gene.currentIndexChanged.connect(self.display_gene_data)
|
| 37 |
+
self.view.gene_selected.connect(self.on_gene_selected)
|
| 38 |
|
| 39 |
def load_targets(self, selected_targets, organism, endonuclease):
|
| 40 |
try:
|
|
|
|
| 55 |
targets_time = time.time() - targets_start
|
| 56 |
self.global_settings.logger.debug(f"Getting targets took: {targets_time:.2f} seconds")
|
| 57 |
|
| 58 |
+
# Get feature ID mapping from FindTargetsModel - Optimized with timing
|
| 59 |
+
genes_start = time.time()
|
| 60 |
+
|
| 61 |
+
# Time set creation
|
| 62 |
+
set_start = time.time()
|
| 63 |
+
seen_genes = set()
|
| 64 |
+
formatted_genes = []
|
| 65 |
+
set_time = time.time() - set_start
|
| 66 |
+
self.global_settings.logger.debug(f"Set initialization took: {set_time:.2f} seconds")
|
| 67 |
+
|
| 68 |
+
# Time target processing
|
| 69 |
+
process_start = time.time()
|
| 70 |
+
for target in selected_targets:
|
| 71 |
+
gene_name = target.get('feature_name')
|
| 72 |
+
feature_id = target.get('feature_id')
|
| 73 |
+
|
| 74 |
+
if gene_name and feature_id and gene_name not in seen_genes:
|
| 75 |
+
seen_genes.add(gene_name)
|
| 76 |
+
formatted_genes.append(f"{feature_id}: {gene_name}")
|
| 77 |
+
process_time = time.time() - process_start
|
| 78 |
+
self.global_settings.logger.debug(f"Target processing took: {process_time:.2f} seconds")
|
| 79 |
+
|
| 80 |
+
# Time sorting
|
| 81 |
+
sort_start = time.time()
|
| 82 |
+
formatted_genes.sort()
|
| 83 |
+
sort_time = time.time() - sort_start
|
| 84 |
+
self.global_settings.logger.debug(f"Sorting took: {sort_time:.2f} seconds")
|
| 85 |
+
|
| 86 |
+
# Time view update
|
| 87 |
+
view_start = time.time()
|
| 88 |
+
self.view.set_combo_box_gene(formatted_genes)
|
| 89 |
+
view_time = time.time() - view_start
|
| 90 |
+
self.global_settings.logger.debug(f"View update took: {view_time:.2f} seconds")
|
| 91 |
+
|
| 92 |
+
genes_time = time.time() - genes_start
|
| 93 |
+
self.global_settings.logger.debug(f"Total setting genes took: {genes_time:.2f} seconds")
|
| 94 |
+
|
| 95 |
# Time displaying targets
|
| 96 |
display_start = time.time()
|
| 97 |
self.view.display_targets_in_table(targets)
|
|
|
|
| 117 |
show_error(self.global_settings, "Error loading targets", str(e))
|
| 118 |
|
| 119 |
def load_gene_viewer(self):
|
| 120 |
+
"""Load gene viewer with sequence and location information"""
|
| 121 |
try:
|
| 122 |
+
total_start = time.time()
|
| 123 |
+
|
| 124 |
+
# Get selected gene from combo box
|
| 125 |
+
combo_start = time.time()
|
| 126 |
+
selected_text = self.view.combo_box_gene.currentText()
|
| 127 |
+
if not selected_text:
|
| 128 |
+
self.global_settings.logger.debug("No gene selected")
|
| 129 |
+
return
|
| 130 |
+
combo_time = time.time() - combo_start
|
| 131 |
+
self.global_settings.logger.debug(f"Combo box access time: {combo_time:.2f} seconds")
|
| 132 |
+
|
| 133 |
+
# Extract locus tag from "locus_tag: gene_name" format
|
| 134 |
+
parse_start = time.time()
|
| 135 |
+
locus_tag = selected_text.split(': ')[0] if ': ' in selected_text else selected_text
|
| 136 |
+
self.global_settings.logger.debug(f"Loading sequence for locus tag: {locus_tag}")
|
| 137 |
+
parse_time = time.time() - parse_start
|
| 138 |
+
self.global_settings.logger.debug(f"Locus tag parsing time: {parse_time:.2f} seconds")
|
| 139 |
+
|
| 140 |
+
# Get gene sequence with padding
|
| 141 |
+
sequence_start = time.time()
|
| 142 |
+
sequence_data = self.model.get_gene_sequence(locus_tag)
|
| 143 |
+
sequence_time = time.time() - sequence_start
|
| 144 |
+
self.global_settings.logger.debug(f"Sequence retrieval time: {sequence_time:.2f} seconds")
|
| 145 |
+
|
| 146 |
+
if sequence_data:
|
| 147 |
+
# Update gene viewer with sequence
|
| 148 |
+
viewer_start = time.time()
|
| 149 |
+
self.view.set_text_edit_gene_viewer(sequence_data['sequence'])
|
| 150 |
+
viewer_time = time.time() - viewer_start
|
| 151 |
+
self.global_settings.logger.debug(f"Text viewer update time: {viewer_time:.2f} seconds")
|
| 152 |
|
| 153 |
+
# Update location fields
|
| 154 |
+
location_start = time.time()
|
| 155 |
+
self.view.line_edit_start_location.setText(str(sequence_data['start']))
|
| 156 |
+
self.view.line_edit_stop_location.setText(str(sequence_data['end']))
|
| 157 |
+
location_time = time.time() - location_start
|
| 158 |
+
self.global_settings.logger.debug(f"Location fields update time: {location_time:.2f} seconds")
|
| 159 |
|
| 160 |
+
total_time = time.time() - total_start
|
| 161 |
+
self.global_settings.logger.debug(f"Total gene viewer loading took: {total_time:.2f} seconds")
|
| 162 |
+
else:
|
| 163 |
+
self.global_settings.logger.warning(f"No sequence data found for locus tag {locus_tag}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
|
|
|
|
|
|
|
|
|
| 165 |
except Exception as e:
|
| 166 |
+
self.global_settings.logger.error(f"Error in load_gene_viewer: {str(e)}")
|
| 167 |
+
self.global_settings.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 168 |
|
| 169 |
def perform_off_target_analysis(self):
|
| 170 |
try:
|
|
|
|
| 193 |
show_error(self.global_settings, "Error in cotargeting", str(e))
|
| 194 |
|
| 195 |
def highlight_gene_viewer(self):
|
| 196 |
+
"""Highlight selected targets in gene viewer"""
|
| 197 |
try:
|
| 198 |
self.global_settings.logger.debug("Starting highlight_gene_viewer")
|
| 199 |
|
|
|
|
| 219 |
|
| 220 |
# Get current gene sequence
|
| 221 |
current_gene = self.view.combo_box_gene.currentText()
|
| 222 |
+
locus_tag = current_gene.split(': ')[0] if ': ' in current_gene else current_gene
|
| 223 |
+
self.global_settings.logger.debug(f"Getting sequence for locus tag: {locus_tag}")
|
| 224 |
|
| 225 |
+
# Get gene sequence with padding
|
| 226 |
+
sequence_data = self.model.get_gene_sequence(locus_tag)
|
| 227 |
+
if not sequence_data or 'sequence' not in sequence_data:
|
| 228 |
+
self.global_settings.logger.error("No sequence data found")
|
| 229 |
QMessageBox.warning(self.view, "No Gene Data",
|
| 230 |
+
"Could not get gene sequence for highlighting.")
|
| 231 |
return
|
| 232 |
|
| 233 |
+
self.global_settings.logger.debug(f"Gene sequence length: {len(sequence_data['sequence'])}")
|
| 234 |
|
| 235 |
# Highlight the sequences
|
| 236 |
if targets_to_highlight:
|
| 237 |
self.global_settings.logger.debug("Attempting to highlight sequences")
|
| 238 |
+
self.highlight_targets_in_gene_viewer(targets_to_highlight)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
else:
|
| 240 |
self.global_settings.logger.error("No valid targets to highlight")
|
| 241 |
QMessageBox.warning(self.view, "No Valid Targets",
|
|
|
|
| 270 |
show_error(self.global_settings, "Error showing filter options", str(e))
|
| 271 |
|
| 272 |
def show_scoring_options(self):
|
| 273 |
+
"""Show scoring options window"""
|
| 274 |
try:
|
| 275 |
+
# Create scoring options controller if not exists
|
| 276 |
+
if not hasattr(self, '_scoring_options_controller'):
|
| 277 |
+
# Create controller with self as view_targets_controller
|
| 278 |
+
self._scoring_options_controller = ScoringOptionsController(
|
| 279 |
+
global_settings=self.global_settings,
|
| 280 |
+
view_targets_controller=self
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
# Show scoring options window
|
| 284 |
+
self._scoring_options_controller.show()
|
| 285 |
+
|
| 286 |
except Exception as e:
|
| 287 |
+
self.global_settings.logger.error(f"Error showing scoring options: {str(e)}")
|
| 288 |
+
self.global_settings.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 289 |
+
show_error(self.global_settings, "Error", f"Could not show scoring options: {str(e)}")
|
| 290 |
|
| 291 |
def change_indices(self):
|
| 292 |
try:
|
|
|
|
| 340 |
|
| 341 |
def show(self):
|
| 342 |
self.view.show()
|
| 343 |
+
|
| 344 |
+
def on_gene_selected(self, selected_text):
|
| 345 |
+
"""Handle gene selection signal"""
|
| 346 |
+
try:
|
| 347 |
+
# Extract locus tag from "locus_tag: gene_name" format
|
| 348 |
+
locus_tag = selected_text.split(': ')[0] if ': ' in selected_text else selected_text
|
| 349 |
+
self.global_settings.logger.debug(f"Loading sequence for locus tag: {locus_tag}")
|
| 350 |
+
|
| 351 |
+
# Get gene sequence with padding using locus tag
|
| 352 |
+
sequence_data = self.model.get_gene_sequence(locus_tag)
|
| 353 |
+
if sequence_data:
|
| 354 |
+
# Update gene viewer with sequence
|
| 355 |
+
self.view.set_text_edit_gene_viewer(sequence_data['sequence'])
|
| 356 |
+
|
| 357 |
+
# Update location fields
|
| 358 |
+
self.view.line_edit_start_location.setText(str(sequence_data['start']))
|
| 359 |
+
self.view.line_edit_stop_location.setText(str(sequence_data['end']))
|
| 360 |
+
|
| 361 |
+
self.global_settings.logger.debug(f"Updated gene viewer with sequence of length: {len(sequence_data['sequence'])}")
|
| 362 |
+
else:
|
| 363 |
+
self.global_settings.logger.warning(f"No sequence data found for locus tag {locus_tag}")
|
| 364 |
+
self.view.set_text_edit_gene_viewer("No sequence data available for this gene")
|
| 365 |
+
self.view.line_edit_start_location.clear()
|
| 366 |
+
self.view.line_edit_stop_location.clear()
|
| 367 |
+
|
| 368 |
+
except Exception as e:
|
| 369 |
+
self.global_settings.logger.error(f"Error handling gene selection: {str(e)}")
|
| 370 |
+
self.global_settings.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 371 |
+
|
| 372 |
+
def highlight_targets_in_gene_viewer(self, targets_to_highlight=None):
|
| 373 |
+
"""Highlight selected targets in gene viewer"""
|
| 374 |
+
try:
|
| 375 |
+
self.global_settings.logger.debug("Starting highlight_gene_viewer")
|
| 376 |
+
|
| 377 |
+
# Get selected targets if none provided
|
| 378 |
+
if targets_to_highlight is None:
|
| 379 |
+
targets_to_highlight = self.view.get_selected_targets()
|
| 380 |
+
|
| 381 |
+
self.global_settings.logger.debug(f"Selected targets: {targets_to_highlight}")
|
| 382 |
+
|
| 383 |
+
if not targets_to_highlight:
|
| 384 |
+
QMessageBox.warning(self.view, "No Selection",
|
| 385 |
+
"Please select targets to highlight in the gene viewer.")
|
| 386 |
+
return
|
| 387 |
+
|
| 388 |
+
# Get current gene sequence
|
| 389 |
+
selected_text = self.view.combo_box_gene.currentText()
|
| 390 |
+
locus_tag = selected_text.split(': ')[0] if ': ' in selected_text else selected_text
|
| 391 |
+
|
| 392 |
+
sequence_data = self.model.get_gene_sequence(locus_tag)
|
| 393 |
+
if not sequence_data or 'sequence' not in sequence_data:
|
| 394 |
+
self.global_settings.logger.error("No sequence data available for highlighting")
|
| 395 |
+
return
|
| 396 |
+
|
| 397 |
+
sequence = sequence_data['sequence']
|
| 398 |
+
|
| 399 |
+
# Sort targets by position for efficient highlighting
|
| 400 |
+
highlights = []
|
| 401 |
+
sequences_found = 0
|
| 402 |
+
total_sequences = len(targets_to_highlight)
|
| 403 |
+
|
| 404 |
+
for target in targets_to_highlight:
|
| 405 |
+
self.global_settings.logger.debug(f"Processing target: {target}")
|
| 406 |
+
sequence_to_find = target['sequence']
|
| 407 |
+
strand = target['strand']
|
| 408 |
+
|
| 409 |
+
# For negative strand, we need to use reverse complement
|
| 410 |
+
if strand == '-':
|
| 411 |
+
sequence_to_find = str(Seq(sequence_to_find).reverse_complement())
|
| 412 |
+
self.global_settings.logger.debug(f"Reverse complemented sequence: {sequence_to_find}")
|
| 413 |
+
|
| 414 |
+
# Search for the sequence in the gene viewer text
|
| 415 |
+
sequence_upper = sequence.upper()
|
| 416 |
+
target_upper = sequence_to_find.upper()
|
| 417 |
+
|
| 418 |
+
self.global_settings.logger.debug(f"Searching for sequence: {target_upper}")
|
| 419 |
+
|
| 420 |
+
# Find all occurrences
|
| 421 |
+
pos = sequence_upper.find(target_upper)
|
| 422 |
+
if pos != -1:
|
| 423 |
+
self.global_settings.logger.debug(f"Found sequence at position: {pos}")
|
| 424 |
+
color = 'red' if strand == '-' else 'green'
|
| 425 |
+
highlights.append((pos, len(sequence_to_find), color))
|
| 426 |
+
sequences_found += 1
|
| 427 |
+
else:
|
| 428 |
+
self.global_settings.logger.debug(f"Sequence not found: {target_upper}")
|
| 429 |
+
|
| 430 |
+
# Only show warning if NO sequences were found
|
| 431 |
+
if sequences_found == 0:
|
| 432 |
+
self.global_settings.logger.warning("No sequences could be highlighted")
|
| 433 |
+
QMessageBox.warning(self.view, "Highlighting Failed",
|
| 434 |
+
"Could not highlight any of the selected sequences in the current gene view.")
|
| 435 |
+
return
|
| 436 |
+
|
| 437 |
+
self.global_settings.logger.debug(f"Found {sequences_found} out of {total_sequences} sequences to highlight")
|
| 438 |
+
|
| 439 |
+
# Build highlighted sequence
|
| 440 |
+
result = []
|
| 441 |
+
last_pos = 0
|
| 442 |
+
for pos, length, color in sorted(highlights): # Sort highlights by position
|
| 443 |
+
result.append(sequence[last_pos:pos])
|
| 444 |
+
result.append(f"<span style='background-color: {color};'>")
|
| 445 |
+
result.append(sequence[pos:pos+length])
|
| 446 |
+
result.append("</span>")
|
| 447 |
+
last_pos = pos + length
|
| 448 |
+
|
| 449 |
+
result.append(sequence[last_pos:])
|
| 450 |
+
highlighted_sequence = ''.join(result)
|
| 451 |
+
|
| 452 |
+
# Update the view with highlighted sequence
|
| 453 |
+
self.view.update_gene_viewer(highlighted_sequence)
|
| 454 |
+
self.global_settings.logger.debug(f"Successfully highlighted {sequences_found} sequences")
|
| 455 |
+
|
| 456 |
+
except Exception as e:
|
| 457 |
+
self.global_settings.logger.error(f"Error highlighting targets: {str(e)}")
|
| 458 |
+
self.global_settings.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 459 |
+
|
| 460 |
+
def update_scores(self, scores, algorithm):
|
| 461 |
+
"""Update the table with new scores from alternative scoring methods"""
|
| 462 |
+
try:
|
| 463 |
+
# Get current table headers
|
| 464 |
+
headers = self.view.get_table_headers()
|
| 465 |
+
|
| 466 |
+
# Get selected rows
|
| 467 |
+
selected_rows = sorted(set(index.row() for index in self.view.table_targets.selectedIndexes()))
|
| 468 |
+
if not selected_rows:
|
| 469 |
+
self.global_settings.logger.warning("No rows selected for scoring")
|
| 470 |
+
return
|
| 471 |
+
|
| 472 |
+
# Determine the position for the new column (after the "Score" column)
|
| 473 |
+
score_index = headers.index("Score")
|
| 474 |
+
desired_index = score_index + 1
|
| 475 |
+
|
| 476 |
+
# Disable updates to prevent crashes
|
| 477 |
+
self.view.table_targets.setUpdatesEnabled(False)
|
| 478 |
+
|
| 479 |
+
try:
|
| 480 |
+
# Add new column for algorithm if it doesn't exist
|
| 481 |
+
if algorithm not in headers:
|
| 482 |
+
# Store current column count
|
| 483 |
+
current_cols = self.view.table_targets.columnCount()
|
| 484 |
+
|
| 485 |
+
# Insert new column after Score
|
| 486 |
+
self.view.table_targets.insertColumn(desired_index)
|
| 487 |
+
|
| 488 |
+
# Set header for new column
|
| 489 |
+
self.view.table_targets.setHorizontalHeaderItem(
|
| 490 |
+
desired_index,
|
| 491 |
+
QtWidgets.QTableWidgetItem(algorithm)
|
| 492 |
+
)
|
| 493 |
+
|
| 494 |
+
# Move Off-Target and Details columns one position right
|
| 495 |
+
for row in range(self.view.table_targets.rowCount()):
|
| 496 |
+
# Move Off-Target
|
| 497 |
+
off_target_item = self.view.table_targets.takeItem(row, desired_index)
|
| 498 |
+
if off_target_item:
|
| 499 |
+
self.view.table_targets.setItem(row, desired_index + 1, off_target_item)
|
| 500 |
+
|
| 501 |
+
# Move Details button
|
| 502 |
+
details_widget = self.view.table_targets.cellWidget(row, desired_index)
|
| 503 |
+
if details_widget:
|
| 504 |
+
self.view.table_targets.setCellWidget(row, desired_index + 1, details_widget)
|
| 505 |
+
|
| 506 |
+
col_index = desired_index
|
| 507 |
+
else:
|
| 508 |
+
col_index = headers.index(algorithm)
|
| 509 |
+
|
| 510 |
+
# Update scores in the table for selected rows only
|
| 511 |
+
for score_idx, row in enumerate(selected_rows):
|
| 512 |
+
if score_idx < len(scores) and scores[score_idx] != -1:
|
| 513 |
+
score_item = QtWidgets.QTableWidgetItem()
|
| 514 |
+
# Round to 2 decimal places
|
| 515 |
+
rounded_score = round(float(scores[score_idx]), 2)
|
| 516 |
+
score_item.setData(QtCore.Qt.ItemDataRole.EditRole, rounded_score)
|
| 517 |
+
self.view.table_targets.setItem(row, col_index, score_item)
|
| 518 |
+
|
| 519 |
+
# Also update the target data to preserve score during filtering/sorting
|
| 520 |
+
if hasattr(self.view, '_all_results'):
|
| 521 |
+
self.view._all_results[row]['azimuth_score'] = rounded_score
|
| 522 |
+
|
| 523 |
+
# Resize columns to fit new content
|
| 524 |
+
self.view.table_targets.resizeColumnsToContents()
|
| 525 |
+
|
| 526 |
+
self.global_settings.logger.debug(f"Updated scores for algorithm: {algorithm}")
|
| 527 |
+
self.global_settings.logger.debug(f"Updated rows: {selected_rows}")
|
| 528 |
+
|
| 529 |
+
finally:
|
| 530 |
+
# Re-enable updates
|
| 531 |
+
self.view.table_targets.setUpdatesEnabled(True)
|
| 532 |
+
|
| 533 |
+
except Exception as e:
|
| 534 |
+
self.global_settings.logger.error(f"Error updating scores: {str(e)}")
|
| 535 |
+
raise
|
|
@@ -3,6 +3,9 @@ from Bio import SeqIO
|
|
| 3 |
import os
|
| 4 |
import traceback
|
| 5 |
from functools import lru_cache
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
class AnnotationParser:
|
| 8 |
def __init__(self, global_settings):
|
|
@@ -10,36 +13,129 @@ class AnnotationParser:
|
|
| 10 |
self.logger = global_settings.get_logger()
|
| 11 |
self.annotation_file_name = ""
|
| 12 |
self.available_genes = []
|
| 13 |
-
self._feature_cache = {}
|
| 14 |
-
self._record_cache = {}
|
| 15 |
-
self.gene_cache = {}
|
|
|
|
| 16 |
|
| 17 |
def set_annotation_file(self, file_path):
|
| 18 |
-
|
| 19 |
-
self.annotation_file_name
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
return
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
def genbank_search(self, queries):
|
|
|
|
| 43 |
try:
|
| 44 |
if not self.annotation_file_name:
|
| 45 |
raise ValueError("Annotation file not set")
|
|
@@ -49,39 +145,86 @@ class AnnotationParser:
|
|
| 49 |
|
| 50 |
# Convert queries to lowercase set for faster lookup
|
| 51 |
queries = {q.lower() for q in queries}
|
|
|
|
| 52 |
|
| 53 |
-
#
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
#
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
-
# Check if any query matches
|
| 76 |
-
if any(query in searchable_text for query in queries):
|
| 77 |
-
results_list.append((record.id, feature))
|
| 78 |
-
|
| 79 |
-
self.logger.debug(f"Found {len(results_list)} results")
|
| 80 |
return results_list
|
|
|
|
| 81 |
except Exception as e:
|
| 82 |
self.logger.error(f"Error in genbank_search: {str(e)}")
|
| 83 |
raise
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
def get_max_chrom(self):
|
| 86 |
try:
|
| 87 |
parser = SeqIO.parse(self.annotation_file_name, 'genbank')
|
|
@@ -153,59 +296,9 @@ class AnnotationParser:
|
|
| 153 |
def get_available_genes(self):
|
| 154 |
return self.available_genes
|
| 155 |
|
| 156 |
-
def
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
"""
|
| 160 |
-
try:
|
| 161 |
-
self.logger.debug(f"AnnotationParser.get_gene_data called with identifier: {gene_identifier}")
|
| 162 |
-
|
| 163 |
-
if not gene_identifier:
|
| 164 |
-
self.logger.warning("Empty gene identifier provided")
|
| 165 |
-
return None
|
| 166 |
-
|
| 167 |
-
# Handle numeric gene identifiers
|
| 168 |
-
if isinstance(gene_identifier, int) or str(gene_identifier).isdigit():
|
| 169 |
-
if self.available_genes:
|
| 170 |
-
gene_identifier = self.available_genes[0]
|
| 171 |
-
else:
|
| 172 |
-
return None
|
| 173 |
-
|
| 174 |
-
# Check main cache first
|
| 175 |
-
cache_key = f"gene_data_{gene_identifier}"
|
| 176 |
-
if cache_key in self._feature_cache:
|
| 177 |
-
return self._feature_cache[cache_key]
|
| 178 |
-
|
| 179 |
-
# Get cached records
|
| 180 |
-
records = self._get_records()
|
| 181 |
-
if not records:
|
| 182 |
-
return None
|
| 183 |
-
|
| 184 |
-
# Use gene index if available
|
| 185 |
-
if not hasattr(self, '_gene_index'):
|
| 186 |
-
self._build_gene_index(records)
|
| 187 |
-
|
| 188 |
-
# Try to get location from index
|
| 189 |
-
if gene_identifier in self._gene_index:
|
| 190 |
-
record_id, feature = self._gene_index[gene_identifier]
|
| 191 |
-
for record in records:
|
| 192 |
-
if record.id == record_id:
|
| 193 |
-
sequence = str(feature.extract(record.seq))
|
| 194 |
-
feature_info = self._get_feature_info(feature)
|
| 195 |
-
|
| 196 |
-
result = {
|
| 197 |
-
'sequence': sequence,
|
| 198 |
-
'info': feature_info
|
| 199 |
-
}
|
| 200 |
-
|
| 201 |
-
self._feature_cache[cache_key] = result
|
| 202 |
-
return result
|
| 203 |
-
|
| 204 |
-
return None
|
| 205 |
-
|
| 206 |
-
except Exception as e:
|
| 207 |
-
self.logger.error(f"Error in get_gene_data: {str(e)}")
|
| 208 |
-
return None
|
| 209 |
|
| 210 |
def _build_gene_index(self, records):
|
| 211 |
"""Build an index of genes for faster lookup"""
|
|
@@ -233,6 +326,3 @@ class AnnotationParser:
|
|
| 233 |
except Exception as e:
|
| 234 |
self.logger.error(f"Error parsing available genes: {str(e)}")
|
| 235 |
|
| 236 |
-
def get_full_gene_sequence(self):
|
| 237 |
-
# Implement this method if needed
|
| 238 |
-
pass
|
|
|
|
| 3 |
import os
|
| 4 |
import traceback
|
| 5 |
from functools import lru_cache
|
| 6 |
+
import json
|
| 7 |
+
import pickle
|
| 8 |
+
import time
|
| 9 |
|
| 10 |
class AnnotationParser:
|
| 11 |
def __init__(self, global_settings):
|
|
|
|
| 13 |
self.logger = global_settings.get_logger()
|
| 14 |
self.annotation_file_name = ""
|
| 15 |
self.available_genes = []
|
| 16 |
+
self._feature_cache = {}
|
| 17 |
+
self._record_cache = {}
|
| 18 |
+
self.gene_cache = {}
|
| 19 |
+
self.index_file = None
|
| 20 |
|
| 21 |
def set_annotation_file(self, file_path):
|
| 22 |
+
try:
|
| 23 |
+
if self.annotation_file_name != file_path:
|
| 24 |
+
total_start = time.time()
|
| 25 |
+
|
| 26 |
+
self.annotation_file_name = file_path
|
| 27 |
+
self.logger.debug(f"Set annotation file to: {file_path}")
|
| 28 |
+
|
| 29 |
+
# Set index file path
|
| 30 |
+
self.index_file = f"{file_path}.index"
|
| 31 |
+
|
| 32 |
+
# Load or create index
|
| 33 |
+
index_start = time.time()
|
| 34 |
+
if not self._load_index():
|
| 35 |
+
self.logger.debug("Index not found or outdated, creating new index...")
|
| 36 |
+
create_start = time.time()
|
| 37 |
+
self._create_index()
|
| 38 |
+
create_time = time.time() - create_start
|
| 39 |
+
self.logger.debug(f"Index creation time: {create_time:.2f} seconds")
|
| 40 |
+
index_time = time.time() - index_start
|
| 41 |
+
self.logger.debug(f"Total index handling time: {index_time:.2f} seconds")
|
| 42 |
+
|
| 43 |
+
except Exception as e:
|
| 44 |
+
self.logger.error(f"Error in set_annotation_file: {str(e)}")
|
| 45 |
+
raise
|
| 46 |
+
|
| 47 |
+
def _create_index(self):
|
| 48 |
+
try:
|
| 49 |
+
start_time = time.time()
|
| 50 |
+
self.logger.debug("Creating gene index file...")
|
| 51 |
+
|
| 52 |
+
# Initialize index structure
|
| 53 |
+
index_data = {
|
| 54 |
+
'locus_tags': {}, # Only store by locus_tag
|
| 55 |
+
'sequences': {} # Keep sequences for quick access
|
| 56 |
+
}
|
| 57 |
|
| 58 |
+
# Process records
|
| 59 |
+
record_count = 0
|
| 60 |
+
feature_count = 0
|
| 61 |
+
|
| 62 |
+
for record in SeqIO.parse(self.annotation_file_name, "genbank"):
|
| 63 |
+
record_count += 1
|
| 64 |
+
record_start = time.time()
|
| 65 |
+
|
| 66 |
+
# Store sequence information first
|
| 67 |
+
index_data['sequences'][record.id] = str(record.seq)
|
| 68 |
+
|
| 69 |
+
# Process features
|
| 70 |
+
for feature in record.features:
|
| 71 |
+
if feature.type in ['CDS', 'gene']:
|
| 72 |
+
feature_count += 1
|
| 73 |
+
feature_info = self._get_feature_info(feature)
|
| 74 |
+
locus_tag = feature_info['feature_id']
|
| 75 |
+
|
| 76 |
+
# Only create feature entry if we have a valid locus_tag
|
| 77 |
+
if locus_tag and locus_tag.lower() != "n/a":
|
| 78 |
+
feature_entry = {
|
| 79 |
+
'record_id': record.id,
|
| 80 |
+
'feature_type': feature.type,
|
| 81 |
+
'chromosome': record.id,
|
| 82 |
+
'location': self._get_feature_location(feature),
|
| 83 |
+
'strand': '+' if feature.location.strand == 1 else '-',
|
| 84 |
+
'locus_tag': locus_tag,
|
| 85 |
+
'gene_name': feature_info['feature_name'],
|
| 86 |
+
'description': feature_info['feature_description'],
|
| 87 |
+
'qualifiers': {k: v[0] if isinstance(v, list) else v
|
| 88 |
+
for k, v in feature.qualifiers.items()}
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
# Index only by locus_tag (lowercase for case-insensitive lookup)
|
| 92 |
+
index_data['locus_tags'][locus_tag.lower()] = feature_entry
|
| 93 |
+
|
| 94 |
+
record_time = time.time() - record_start
|
| 95 |
+
if record_count % 100 == 0:
|
| 96 |
+
self.logger.debug(f"Processed {record_count} records, {feature_count} features. Last record time: {record_time:.2f}s")
|
| 97 |
+
|
| 98 |
+
# Save index to file
|
| 99 |
+
save_start = time.time()
|
| 100 |
+
with open(self.index_file, 'wb') as f:
|
| 101 |
+
pickle.dump(index_data, f)
|
| 102 |
+
save_time = time.time() - save_start
|
| 103 |
+
|
| 104 |
+
total_time = time.time() - start_time
|
| 105 |
+
|
| 106 |
+
self._index = index_data
|
| 107 |
+
|
| 108 |
+
self.logger.debug(f"Index creation complete. Records: {record_count}, Features: {feature_count}")
|
| 109 |
+
self.logger.debug(f"Save time: {save_time:.2f}s, Total time: {total_time:.2f}s")
|
| 110 |
+
return True
|
| 111 |
+
|
| 112 |
+
except Exception as e:
|
| 113 |
+
self.logger.error(f"Error creating index: {str(e)}")
|
| 114 |
+
return False
|
| 115 |
|
| 116 |
+
def _load_index(self):
|
| 117 |
+
"""Load the index file if it exists and is newer than the GenBank file"""
|
| 118 |
+
try:
|
| 119 |
+
if not os.path.exists(self.index_file):
|
| 120 |
+
return False
|
| 121 |
+
|
| 122 |
+
# Check if index is older than GenBank file
|
| 123 |
+
if os.path.getmtime(self.index_file) < os.path.getmtime(self.annotation_file_name):
|
| 124 |
+
return False
|
| 125 |
+
|
| 126 |
+
start_time = time.time()
|
| 127 |
+
with open(self.index_file, 'rb') as f:
|
| 128 |
+
self._index = pickle.load(f)
|
| 129 |
+
load_time = time.time() - start_time
|
| 130 |
+
self.logger.debug(f"Index file loaded successfully in {load_time:.2f} seconds")
|
| 131 |
+
return True
|
| 132 |
+
|
| 133 |
+
except Exception as e:
|
| 134 |
+
self.logger.error(f"Error loading index: {str(e)}")
|
| 135 |
+
return False
|
| 136 |
|
| 137 |
def genbank_search(self, queries):
|
| 138 |
+
"""Search using the index file for better performance"""
|
| 139 |
try:
|
| 140 |
if not self.annotation_file_name:
|
| 141 |
raise ValueError("Annotation file not set")
|
|
|
|
| 145 |
|
| 146 |
# Convert queries to lowercase set for faster lookup
|
| 147 |
queries = {q.lower() for q in queries}
|
| 148 |
+
print(f"Search queries: {queries}")
|
| 149 |
|
| 150 |
+
# Search through index
|
| 151 |
+
if hasattr(self, '_index'):
|
| 152 |
+
# Search through all features
|
| 153 |
+
for feature_key, feature_entry in self._index['locus_tags'].items():
|
| 154 |
+
# Check gene name, locus tag, and description
|
| 155 |
+
searchable_text = ' '.join([
|
| 156 |
+
feature_entry['gene_name'].lower(),
|
| 157 |
+
feature_entry['locus_tag'].lower(),
|
| 158 |
+
feature_entry['description'].lower(),
|
| 159 |
+
# Also search through qualifiers
|
| 160 |
+
*[str(v).lower() for v in feature_entry['qualifiers'].values()]
|
| 161 |
+
])
|
| 162 |
+
|
| 163 |
+
# Check if any query matches
|
| 164 |
+
if any(query in searchable_text for query in queries):
|
| 165 |
+
info = {
|
| 166 |
+
'feature_id': feature_entry['locus_tag'],
|
| 167 |
+
'feature_name': feature_entry['gene_name'],
|
| 168 |
+
'feature_location': feature_entry['location'],
|
| 169 |
+
'feature_description': feature_entry['description']
|
| 170 |
+
}
|
| 171 |
+
results_list.append((feature_entry['record_id'], info))
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
return results_list
|
| 174 |
+
|
| 175 |
except Exception as e:
|
| 176 |
self.logger.error(f"Error in genbank_search: {str(e)}")
|
| 177 |
raise
|
| 178 |
|
| 179 |
+
def get_gene_data(self, gene_identifier):
|
| 180 |
+
"""Get gene data using the index for faster retrieval"""
|
| 181 |
+
try:
|
| 182 |
+
if not gene_identifier:
|
| 183 |
+
return None
|
| 184 |
+
|
| 185 |
+
# Ensure string conversion and proper formatting
|
| 186 |
+
gene_identifier = str(gene_identifier).strip().lower()
|
| 187 |
+
|
| 188 |
+
if hasattr(self, '_index'):
|
| 189 |
+
# Try exact match first
|
| 190 |
+
if gene_identifier in self._index['locus_tags']:
|
| 191 |
+
gene_info = self._index['locus_tags'][gene_identifier]
|
| 192 |
+
record_id = gene_info['record_id']
|
| 193 |
+
return {
|
| 194 |
+
'sequence': self._index['sequences'][record_id],
|
| 195 |
+
'info': gene_info
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
# Try case-insensitive match
|
| 199 |
+
for key, value in self._index['locus_tags'].items():
|
| 200 |
+
if str(key).lower() == gene_identifier:
|
| 201 |
+
record_id = value['record_id']
|
| 202 |
+
return {
|
| 203 |
+
'sequence': self._index['sequences'][record_id],
|
| 204 |
+
'info': value
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
return None
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
self.logger.error(f"Error in get_gene_data: {str(e)}")
|
| 211 |
+
return None
|
| 212 |
+
|
| 213 |
+
@lru_cache(maxsize=1)
|
| 214 |
+
def _get_records(self):
|
| 215 |
+
"""Cache and return all records from the annotation file"""
|
| 216 |
+
start_time = time.time()
|
| 217 |
+
if not self._record_cache:
|
| 218 |
+
try:
|
| 219 |
+
self.logger.debug("Loading records from file...")
|
| 220 |
+
self._record_cache = list(SeqIO.parse(self.annotation_file_name, "genbank"))
|
| 221 |
+
load_time = time.time() - start_time
|
| 222 |
+
self.logger.debug(f"Time to load records: {load_time:.2f} seconds")
|
| 223 |
+
except Exception as e:
|
| 224 |
+
self.logger.error(f"Error reading annotation file: {str(e)}")
|
| 225 |
+
return []
|
| 226 |
+
return self._record_cache
|
| 227 |
+
|
| 228 |
def get_max_chrom(self):
|
| 229 |
try:
|
| 230 |
parser = SeqIO.parse(self.annotation_file_name, 'genbank')
|
|
|
|
| 296 |
def get_available_genes(self):
|
| 297 |
return self.available_genes
|
| 298 |
|
| 299 |
+
def get_full_gene_sequence(self):
|
| 300 |
+
# Implement this method if needed
|
| 301 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
def _build_gene_index(self, records):
|
| 304 |
"""Build an index of genes for faster lookup"""
|
|
|
|
| 326 |
except Exception as e:
|
| 327 |
self.logger.error(f"Error parsing available genes: {str(e)}")
|
| 328 |
|
|
|
|
|
|
|
|
|
|
@@ -2,101 +2,193 @@ from utils.sequence_utils import SeqTranslate
|
|
| 2 |
import logging
|
| 3 |
from multiprocessing import Pool, cpu_count
|
| 4 |
from functools import partial
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
class CSPRparser:
|
| 7 |
def __init__(self, inputFileName, casper_info_path):
|
| 8 |
self.fileName = inputFileName
|
| 9 |
-
self.filename = inputFileName
|
| 10 |
self.seqTrans = SeqTranslate(casper_info_path)
|
| 11 |
self.logger = logging.getLogger(__name__)
|
| 12 |
-
self._line_buffer = [] # Pre-allocate buffer for lines
|
| 13 |
self._cached_results = {}
|
|
|
|
| 14 |
|
| 15 |
-
def
|
| 16 |
-
"""
|
| 17 |
try:
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
target_ranges.append((start, end, t['feature_name']))
|
| 24 |
-
target_ranges.sort() # Sort by start position
|
| 25 |
-
|
| 26 |
-
# Pre-allocate results list
|
| 27 |
-
results = []
|
| 28 |
-
results_append = results.append
|
| 29 |
|
| 30 |
-
# Read file in binary mode for speed
|
| 31 |
with open(self.fileName, 'rb') as f:
|
| 32 |
-
# Skip header
|
| 33 |
for _ in range(3):
|
| 34 |
f.readline()
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
for line in f:
|
| 39 |
-
if b'>'
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
#
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
try:
|
| 62 |
-
pos =
|
| 63 |
-
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
# Skip if position is past current range
|
| 69 |
-
if abs_pos >= end:
|
| 70 |
-
current_range_idx += 1
|
| 71 |
-
continue
|
| 72 |
-
|
| 73 |
-
# Check if position is in range
|
| 74 |
-
if start <= abs_pos < end:
|
| 75 |
-
sequence = parts[1].decode()
|
| 76 |
-
pam = sequence[-3:]
|
| 77 |
-
target_seq = sequence[:-3]
|
| 78 |
-
|
| 79 |
-
results_append({
|
| 80 |
'feature_name': feature_name,
|
| 81 |
-
'
|
| 82 |
-
'
|
| 83 |
-
'
|
| 84 |
-
'
|
| 85 |
-
'
|
|
|
|
| 86 |
'strand': "-" if pos < 0 else "+",
|
| 87 |
-
'score': float(parts[3])
|
| 88 |
'endonuclease': endonuclease
|
| 89 |
})
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
return results
|
| 95 |
-
|
| 96 |
except Exception as e:
|
| 97 |
self.logger.error(f"Error in read_targets_batch: {str(e)}")
|
|
|
|
| 98 |
return []
|
| 99 |
-
|
| 100 |
def parse_targets(self, file_path, region):
|
| 101 |
"""Parse targets with parallel processing and caching"""
|
| 102 |
cache_key = f"{file_path}:{region}"
|
|
@@ -127,3 +219,4 @@ class CSPRparser:
|
|
| 127 |
chunk_end = chunk_start + chunk_size if i < cpu_count()-1 else end
|
| 128 |
chunks.append((chunk_start, chunk_end))
|
| 129 |
return chunks
|
|
|
|
|
|
| 2 |
import logging
|
| 3 |
from multiprocessing import Pool, cpu_count
|
| 4 |
from functools import partial
|
| 5 |
+
import time
|
| 6 |
+
import pickle
|
| 7 |
+
import os
|
| 8 |
+
import traceback
|
| 9 |
|
| 10 |
class CSPRparser:
|
| 11 |
def __init__(self, inputFileName, casper_info_path):
|
| 12 |
self.fileName = inputFileName
|
|
|
|
| 13 |
self.seqTrans = SeqTranslate(casper_info_path)
|
| 14 |
self.logger = logging.getLogger(__name__)
|
|
|
|
| 15 |
self._cached_results = {}
|
| 16 |
+
self.index_file = f"{inputFileName}.index"
|
| 17 |
|
| 18 |
+
def _create_index(self):
|
| 19 |
+
"""Create an index file for faster searching"""
|
| 20 |
try:
|
| 21 |
+
start_time = time.time()
|
| 22 |
+
self.logger.debug("Creating CSPR index file...")
|
| 23 |
+
|
| 24 |
+
# Initialize index structure
|
| 25 |
+
index_data = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
|
|
|
| 27 |
with open(self.fileName, 'rb') as f:
|
| 28 |
+
# Skip header lines
|
| 29 |
for _ in range(3):
|
| 30 |
f.readline()
|
| 31 |
|
| 32 |
+
current_chrom = None
|
| 33 |
+
chrom_data = []
|
| 34 |
+
|
| 35 |
+
# Process file line by line
|
| 36 |
for line in f:
|
| 37 |
+
if line.startswith(b'>'):
|
| 38 |
+
# Save previous chromosome data if exists
|
| 39 |
+
if current_chrom and chrom_data:
|
| 40 |
+
index_data[current_chrom] = chrom_data
|
| 41 |
+
|
| 42 |
+
# Start new chromosome
|
| 43 |
+
current_chrom = line.decode().split()[0][1:] # Remove '>' and get chromosome id
|
| 44 |
+
chrom_data = []
|
| 45 |
+
continue
|
| 46 |
+
|
| 47 |
+
if not line.strip():
|
| 48 |
+
continue
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
# Parse position and store line offset
|
| 52 |
+
first_comma = line.find(b',')
|
| 53 |
+
if first_comma != -1:
|
| 54 |
+
pos = int(line[:first_comma])
|
| 55 |
+
abs_pos = abs(pos)
|
| 56 |
+
chrom_data.append((abs_pos, line))
|
| 57 |
+
except ValueError:
|
| 58 |
+
continue
|
| 59 |
+
|
| 60 |
+
# Save last chromosome data
|
| 61 |
+
if current_chrom and chrom_data:
|
| 62 |
+
index_data[current_chrom] = chrom_data
|
| 63 |
+
|
| 64 |
+
# Save index to file
|
| 65 |
+
with open(self.index_file, 'wb') as f:
|
| 66 |
+
pickle.dump(index_data, f)
|
| 67 |
+
|
| 68 |
+
self._index = index_data
|
| 69 |
+
|
| 70 |
+
create_time = time.time() - start_time
|
| 71 |
+
self.logger.debug(f"Index creation time: {create_time:.2f} seconds")
|
| 72 |
+
return True
|
| 73 |
+
|
| 74 |
+
except Exception as e:
|
| 75 |
+
self.logger.error(f"Error creating index: {str(e)}")
|
| 76 |
+
return False
|
| 77 |
+
|
| 78 |
+
def _load_index(self):
|
| 79 |
+
try:
|
| 80 |
+
if not os.path.exists(self.index_file):
|
| 81 |
+
return False
|
| 82 |
+
|
| 83 |
+
if os.path.getmtime(self.index_file) < os.path.getmtime(self.fileName):
|
| 84 |
+
return False
|
| 85 |
+
|
| 86 |
+
with open(self.index_file, 'rb') as f:
|
| 87 |
+
self._index = pickle.load(f)
|
| 88 |
+
return True
|
| 89 |
+
|
| 90 |
+
except Exception as e:
|
| 91 |
+
self.logger.error(f"Error loading index: {str(e)}")
|
| 92 |
+
return False
|
| 93 |
+
|
| 94 |
+
def read_targets_batch(self, chromosome, targets, endonuclease):
|
| 95 |
+
try:
|
| 96 |
+
start_time = time.time()
|
| 97 |
+
|
| 98 |
+
# Load or create index
|
| 99 |
+
if not hasattr(self, '_index'):
|
| 100 |
+
if not self._load_index():
|
| 101 |
+
self._create_index()
|
| 102 |
+
|
| 103 |
+
# Sort targets by start position
|
| 104 |
+
sorted_targets = sorted(targets, key=lambda x: x['start'])
|
| 105 |
+
min_start = sorted_targets[0]['start']
|
| 106 |
+
max_end = max(t['end'] for t in sorted_targets)
|
| 107 |
+
|
| 108 |
+
self.logger.debug(f"Processing targets from {min_start} to {max_end}")
|
| 109 |
+
self.logger.debug(f"Looking for chromosome number: {chromosome}")
|
| 110 |
+
|
| 111 |
+
results = []
|
| 112 |
+
lines_processed = 0
|
| 113 |
+
lines_skipped = 0
|
| 114 |
+
|
| 115 |
+
# Find chromosome in index by counting carets
|
| 116 |
+
found_chrom = None
|
| 117 |
+
chrom_count = 0
|
| 118 |
+
target_chrom_num = int(chromosome) # Convert chromosome to integer
|
| 119 |
+
|
| 120 |
+
# Debug available chromosomes
|
| 121 |
+
self.logger.debug(f"Available chromosomes: {list(self._index.keys())}")
|
| 122 |
+
|
| 123 |
+
for chrom_id in self._index:
|
| 124 |
+
# Decode bytes to string if necessary
|
| 125 |
+
chrom_str = chrom_id.decode() if isinstance(chrom_id, bytes) else chrom_id
|
| 126 |
|
| 127 |
+
# Count carets ('>') to find the right chromosome
|
| 128 |
+
chrom_count += 1
|
| 129 |
+
if chrom_count == target_chrom_num:
|
| 130 |
+
found_chrom = chrom_id
|
| 131 |
+
self.logger.debug(f"Found matching chromosome: {chrom_str}")
|
| 132 |
+
break
|
| 133 |
|
| 134 |
+
if found_chrom:
|
| 135 |
+
chrom_data = self._index[found_chrom]
|
| 136 |
+
|
| 137 |
+
# Binary search for start position
|
| 138 |
+
start_idx = 0
|
| 139 |
+
end_idx = len(chrom_data)
|
| 140 |
+
|
| 141 |
+
for target in sorted_targets:
|
| 142 |
+
target_start = target['start']
|
| 143 |
+
target_end = target['end']
|
| 144 |
+
feature_id = target.get('feature_id', '')
|
| 145 |
+
feature_name = target.get('feature_name', '')
|
| 146 |
+
|
| 147 |
+
# Find relevant positions for this target
|
| 148 |
+
while start_idx < end_idx and chrom_data[start_idx][0] < target_start:
|
| 149 |
+
start_idx += 1
|
| 150 |
+
|
| 151 |
+
current_idx = start_idx
|
| 152 |
+
while current_idx < end_idx and chrom_data[current_idx][0] < target_end:
|
| 153 |
try:
|
| 154 |
+
pos, line = chrom_data[current_idx]
|
| 155 |
+
parts = line.split(b',')
|
| 156 |
|
| 157 |
+
if len(parts) >= 4:
|
| 158 |
+
pos = int(parts[0])
|
| 159 |
+
results.append({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
'feature_name': feature_name,
|
| 161 |
+
'feature_id': feature_id,
|
| 162 |
+
'chromosome': found_chrom,
|
| 163 |
+
'position': abs(pos),
|
| 164 |
+
'location': f"{abs(pos)}-{abs(pos) + 23}",
|
| 165 |
+
'sequence': parts[1].decode(),
|
| 166 |
+
'pam': parts[2].decode(),
|
| 167 |
'strand': "-" if pos < 0 else "+",
|
| 168 |
+
'score': float(parts[3]),
|
| 169 |
'endonuclease': endonuclease
|
| 170 |
})
|
| 171 |
+
lines_processed += 1
|
| 172 |
+
|
| 173 |
+
except (ValueError, IndexError) as e:
|
| 174 |
+
self.logger.error(f"Error processing line: {str(e)}")
|
| 175 |
+
lines_skipped += 1
|
| 176 |
+
|
| 177 |
+
current_idx += 1
|
| 178 |
+
else:
|
| 179 |
+
self.logger.error(f"Chromosome {chromosome} not found in index")
|
| 180 |
+
self.logger.debug(f"Available chromosomes: {list(self._index.keys())}")
|
| 181 |
+
|
| 182 |
+
total_time = time.time() - start_time
|
| 183 |
+
self.logger.debug(f"Processed {lines_processed} lines, skipped {lines_skipped}")
|
| 184 |
+
self.logger.debug(f"Found {len(results)} targets in {total_time:.2f} seconds")
|
| 185 |
|
| 186 |
return results
|
| 187 |
+
|
| 188 |
except Exception as e:
|
| 189 |
self.logger.error(f"Error in read_targets_batch: {str(e)}")
|
| 190 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 191 |
return []
|
|
|
|
| 192 |
def parse_targets(self, file_path, region):
|
| 193 |
"""Parse targets with parallel processing and caching"""
|
| 194 |
cache_key = f"{file_path}:{region}"
|
|
|
|
| 219 |
chunk_end = chunk_start + chunk_size if i < cpu_count()-1 else end
|
| 220 |
chunks.append((chunk_start, chunk_end))
|
| 221 |
return chunks
|
| 222 |
+
|
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from models.HomeWindowModel import HomeWindowModel
|
| 2 |
from models.CSPRparser import CSPRparser
|
| 3 |
from models.AnnotationParser import AnnotationParser
|
|
@@ -26,16 +27,24 @@ class FindTargetsModel(HomeWindowModel):
|
|
| 26 |
def find_targets(self, input_data):
|
| 27 |
self.global_settings.logger.debug(f"Received input data: {input_data}")
|
| 28 |
|
|
|
|
|
|
|
| 29 |
organism = input_data['organism']
|
| 30 |
endo = input_data['endonuclease']
|
| 31 |
org_files = self.get_organism_to_files()
|
| 32 |
|
| 33 |
# Validate input data
|
|
|
|
| 34 |
self._validate_input(organism, endo, org_files)
|
|
|
|
|
|
|
| 35 |
|
| 36 |
# Get file path and parser
|
|
|
|
| 37 |
file_path = os.path.join(self.global_settings.get_db_path(), org_files[organism][endo][0])
|
| 38 |
parser = self._get_parser(file_path)
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# Use dictionary for faster lookup
|
| 41 |
search_types = {
|
|
@@ -50,7 +59,15 @@ class FindTargetsModel(HomeWindowModel):
|
|
| 50 |
self.global_settings.logger.error(error_msg)
|
| 51 |
raise ValueError(error_msg)
|
| 52 |
|
|
|
|
|
|
|
| 53 |
self.results = search_func(parser, input_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
return self.results
|
| 55 |
|
| 56 |
def _validate_input(self, organism, endo, org_files):
|
|
@@ -66,85 +83,78 @@ class FindTargetsModel(HomeWindowModel):
|
|
| 66 |
raise ValueError(error_msg)
|
| 67 |
|
| 68 |
def find_targets_by_feature(self, parser, input_data):
|
| 69 |
-
|
| 70 |
-
annotation_file = (input_data.get('annotation_file') or
|
| 71 |
-
self.global_settings.get_current_annotation_file())
|
| 72 |
-
|
| 73 |
-
search_query = input_data['search_query'].strip().lower()
|
| 74 |
-
|
| 75 |
-
# Create new annotation parser instance for each search
|
| 76 |
-
annotation_parser = AnnotationParser(self.global_settings)
|
| 77 |
-
annotation_file_path = os.path.join(self.global_settings.get_db_path(), 'GBFF', annotation_file)
|
| 78 |
-
annotation_parser.set_annotation_file(annotation_file_path)
|
| 79 |
-
|
| 80 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
results_list = annotation_parser.genbank_search([search_query])
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
if feature.type in ['CDS']:
|
| 94 |
-
feature_info = self._get_feature_info(feature)
|
| 95 |
|
| 96 |
-
#
|
| 97 |
-
|
| 98 |
-
feature_info['feature_name'].lower(),
|
| 99 |
-
feature_info['feature_id'].lower(),
|
| 100 |
-
feature_info['feature_description'].lower()
|
| 101 |
-
])
|
| 102 |
|
| 103 |
-
#
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
if
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
if key in feature.qualifiers:
|
| 140 |
-
return feature.qualifiers[key][0]
|
| 141 |
-
return "N/A"
|
| 142 |
-
|
| 143 |
-
def _get_feature_description(self, feature):
|
| 144 |
-
for key in ['product', 'note']:
|
| 145 |
-
if key in feature.qualifiers:
|
| 146 |
-
return feature.qualifiers[key][0]
|
| 147 |
-
return "N/A"
|
| 148 |
|
| 149 |
def find_targets_by_position(self, parser, input_data):
|
| 150 |
search_query = input_data['search_query']
|
|
|
|
| 1 |
+
import time
|
| 2 |
from models.HomeWindowModel import HomeWindowModel
|
| 3 |
from models.CSPRparser import CSPRparser
|
| 4 |
from models.AnnotationParser import AnnotationParser
|
|
|
|
| 27 |
def find_targets(self, input_data):
|
| 28 |
self.global_settings.logger.debug(f"Received input data: {input_data}")
|
| 29 |
|
| 30 |
+
start_time = time.time()
|
| 31 |
+
|
| 32 |
organism = input_data['organism']
|
| 33 |
endo = input_data['endonuclease']
|
| 34 |
org_files = self.get_organism_to_files()
|
| 35 |
|
| 36 |
# Validate input data
|
| 37 |
+
validate_start = time.time()
|
| 38 |
self._validate_input(organism, endo, org_files)
|
| 39 |
+
validate_time = time.time() - validate_start
|
| 40 |
+
self.global_settings.logger.debug(f"Validation time: {validate_time:.2f} seconds")
|
| 41 |
|
| 42 |
# Get file path and parser
|
| 43 |
+
parser_start = time.time()
|
| 44 |
file_path = os.path.join(self.global_settings.get_db_path(), org_files[organism][endo][0])
|
| 45 |
parser = self._get_parser(file_path)
|
| 46 |
+
parser_time = time.time() - parser_start
|
| 47 |
+
self.global_settings.logger.debug(f"Parser initialization time: {parser_time:.2f} seconds")
|
| 48 |
|
| 49 |
# Use dictionary for faster lookup
|
| 50 |
search_types = {
|
|
|
|
| 59 |
self.global_settings.logger.error(error_msg)
|
| 60 |
raise ValueError(error_msg)
|
| 61 |
|
| 62 |
+
# Perform the search
|
| 63 |
+
search_start = time.time()
|
| 64 |
self.results = search_func(parser, input_data)
|
| 65 |
+
search_time = time.time() - search_start
|
| 66 |
+
self.global_settings.logger.debug(f"Search execution time: {search_time:.2f} seconds")
|
| 67 |
+
|
| 68 |
+
total_time = time.time() - start_time
|
| 69 |
+
self.global_settings.logger.debug(f"Total find_targets time: {total_time:.2f} seconds")
|
| 70 |
+
|
| 71 |
return self.results
|
| 72 |
|
| 73 |
def _validate_input(self, organism, endo, org_files):
|
|
|
|
| 83 |
raise ValueError(error_msg)
|
| 84 |
|
| 85 |
def find_targets_by_feature(self, parser, input_data):
|
| 86 |
+
"""Search for features using the indexed annotation parser"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
try:
|
| 88 |
+
start_time = time.time()
|
| 89 |
+
|
| 90 |
+
# Get annotation file from input data or global settings
|
| 91 |
+
annotation_file = (input_data.get('annotation_file') or
|
| 92 |
+
self.global_settings.get_current_annotation_file())
|
| 93 |
+
|
| 94 |
+
search_query = input_data['search_query'].strip()
|
| 95 |
+
|
| 96 |
+
# Create new annotation parser instance
|
| 97 |
+
parser_start = time.time()
|
| 98 |
+
annotation_parser = AnnotationParser(self.global_settings)
|
| 99 |
+
annotation_file_path = os.path.join(self.global_settings.get_db_path(), 'GBFF', annotation_file)
|
| 100 |
+
annotation_parser.set_annotation_file(annotation_file_path)
|
| 101 |
+
parser_time = time.time() - parser_start
|
| 102 |
+
self.global_settings.logger.debug(f"Annotation parser initialization time: {parser_time:.2f} seconds")
|
| 103 |
+
|
| 104 |
+
# Use indexed search
|
| 105 |
+
search_start = time.time()
|
| 106 |
results_list = annotation_parser.genbank_search([search_query])
|
| 107 |
+
search_time = time.time() - search_start
|
| 108 |
+
self.global_settings.logger.debug(f"Genbank search time: {search_time:.2f} seconds")
|
| 109 |
+
|
| 110 |
+
# Format results
|
| 111 |
+
format_start = time.time()
|
| 112 |
+
formatted_results = []
|
| 113 |
+
for record_id, feature_info in results_list:
|
| 114 |
+
# Extract start and end from feature_location
|
| 115 |
+
location = feature_info['feature_location']
|
| 116 |
+
start_end = location.split('(')[0] # Get part before the strand
|
| 117 |
+
start, end = map(int, start_end.split(':'))
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
# Extract chromosome number from record_id (e.g., "NZ_CP132594.1" -> "1")
|
| 120 |
+
chrom_num = record_id.split('.')[-1] if '.' in record_id else '1'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
# Create target info with feature_id and chromosome number
|
| 123 |
+
target_info = {
|
| 124 |
+
'feature_type': 'CDS',
|
| 125 |
+
'chromosome': chrom_num, # Use chromosome number
|
| 126 |
+
'full_chromosome': record_id, # Store full chromosome name for reference
|
| 127 |
+
'feature_id': feature_info['feature_id'],
|
| 128 |
+
'feature_name': feature_info['feature_name'],
|
| 129 |
+
'feature_description': feature_info['feature_description'],
|
| 130 |
+
'location': f"{start}-{end}",
|
| 131 |
+
'start': start,
|
| 132 |
+
'end': end,
|
| 133 |
+
'strand': '+' if '(+)' in location else '-',
|
| 134 |
+
'endonuclease': input_data['endonuclease']
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
# Debug log the target info
|
| 138 |
+
self.global_settings.logger.debug(f"Created target info: {target_info}")
|
| 139 |
+
|
| 140 |
+
formatted_results.append(target_info)
|
| 141 |
+
|
| 142 |
+
format_time = time.time() - format_start
|
| 143 |
+
self.global_settings.logger.debug(f"Result formatting time: {format_time:.2f} seconds")
|
| 144 |
+
|
| 145 |
+
# Debug log sample results
|
| 146 |
+
if formatted_results:
|
| 147 |
+
self.global_settings.logger.debug(f"Sample formatted result: {formatted_results[0]}")
|
| 148 |
+
self.global_settings.logger.debug(f"Feature IDs present: {[r['feature_id'] for r in formatted_results[:5]]}")
|
| 149 |
+
|
| 150 |
+
total_time = time.time() - start_time
|
| 151 |
+
self.global_settings.logger.debug(f"Total find_targets_by_feature time: {total_time:.2f} seconds")
|
| 152 |
+
|
| 153 |
+
return formatted_results
|
| 154 |
+
|
| 155 |
+
except Exception as e:
|
| 156 |
+
self.global_settings.logger.error(f"Error in find_targets_by_feature: {str(e)}")
|
| 157 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
def find_targets_by_position(self, parser, input_data):
|
| 160 |
search_query = input_data['search_query']
|
|
@@ -285,5 +285,10 @@ class GlobalSettings(QObject):
|
|
| 285 |
self._current_annotation_file = self._current_home_window.get_annotation_file()
|
| 286 |
return self._current_annotation_file
|
| 287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
# Global instance
|
| 289 |
global_settings = None
|
|
|
|
| 285 |
self._current_annotation_file = self._current_home_window.get_annotation_file()
|
| 286 |
return self._current_annotation_file
|
| 287 |
|
| 288 |
+
def get_scoring_options_window(self, view_targets_controller):
|
| 289 |
+
"""Create and return ScoringOptionsController instance"""
|
| 290 |
+
from controllers.ScoringOptionsController import ScoringOptionsController
|
| 291 |
+
return ScoringOptionsController(self, view_targets_controller)
|
| 292 |
+
|
| 293 |
# Global instance
|
| 294 |
global_settings = None
|
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from Bio import SeqIO
|
| 3 |
+
import traceback
|
| 4 |
+
import warnings
|
| 5 |
+
import contextlib
|
| 6 |
+
import sys
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
class ScoringOptionsModel:
|
| 10 |
+
def __init__(self, global_settings):
|
| 11 |
+
self.global_settings = global_settings
|
| 12 |
+
self.logger = global_settings.get_logger()
|
| 13 |
+
self.genome = ""
|
| 14 |
+
self.rev_genome = ""
|
| 15 |
+
self.fasta_path = ""
|
| 16 |
+
|
| 17 |
+
def load_fasta(self, fasta_path, chromosome):
|
| 18 |
+
"""Load FASTA file for the specified chromosome"""
|
| 19 |
+
try:
|
| 20 |
+
self.fasta_path = fasta_path
|
| 21 |
+
|
| 22 |
+
# Extract chromosome number from ID (e.g., "NZ_CP032679.1" -> "1")
|
| 23 |
+
chrom_num = int(chromosome.split('.')[-1]) if '.' in chromosome else 1
|
| 24 |
+
self.logger.debug(f"Looking for chromosome number: {chrom_num}")
|
| 25 |
+
|
| 26 |
+
# Load only the required chromosome
|
| 27 |
+
for i, record in enumerate(SeqIO.parse(fasta_path, "fasta")):
|
| 28 |
+
if i + 1 == chrom_num: # 1-based indexing
|
| 29 |
+
self.genome = str(record.seq).upper()
|
| 30 |
+
self.rev_genome = str(record.seq.reverse_complement()).upper()
|
| 31 |
+
self.logger.debug(f"Loaded chromosome {chrom_num} sequence of length {len(self.genome)}")
|
| 32 |
+
break
|
| 33 |
+
|
| 34 |
+
return True
|
| 35 |
+
|
| 36 |
+
except Exception as e:
|
| 37 |
+
self.logger.error(f"Error loading FASTA file: {str(e)}")
|
| 38 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 39 |
+
return False
|
| 40 |
+
|
| 41 |
+
def score_sequences(self, targets, algorithm="Azimuth 2.0"):
|
| 42 |
+
"""Score the target sequences using the specified algorithm"""
|
| 43 |
+
try:
|
| 44 |
+
guide_list = []
|
| 45 |
+
reject_list = []
|
| 46 |
+
full_seqs = []
|
| 47 |
+
|
| 48 |
+
# Process each target
|
| 49 |
+
for i, target in enumerate(targets):
|
| 50 |
+
strand = target['strand']
|
| 51 |
+
sequence = target['sequence'] + target['pam']
|
| 52 |
+
guide_list.append(sequence)
|
| 53 |
+
|
| 54 |
+
# Search for sequence in genome
|
| 55 |
+
if strand == "+":
|
| 56 |
+
pos = self.genome.find(sequence)
|
| 57 |
+
if pos != -1:
|
| 58 |
+
full_seqs.append(self.genome[pos-4:pos+26])
|
| 59 |
+
else:
|
| 60 |
+
reject_list.append(i)
|
| 61 |
+
else:
|
| 62 |
+
pos = self.rev_genome.find(sequence)
|
| 63 |
+
if pos != -1:
|
| 64 |
+
full_seqs.append(self.rev_genome[pos-4:pos+26])
|
| 65 |
+
else:
|
| 66 |
+
reject_list.append(i)
|
| 67 |
+
|
| 68 |
+
# Score sequences if any were found
|
| 69 |
+
if full_seqs:
|
| 70 |
+
full_seqs = np.array(full_seqs)
|
| 71 |
+
|
| 72 |
+
# Score using selected algorithm
|
| 73 |
+
if algorithm == "Azimuth 2.0":
|
| 74 |
+
with warnings.catch_warnings():
|
| 75 |
+
warnings.simplefilter("ignore")
|
| 76 |
+
# Add utils directory to Python path
|
| 77 |
+
utils_path = os.path.join(self.global_settings.get_src_dir_path(), 'utils')
|
| 78 |
+
if utils_path not in sys.path:
|
| 79 |
+
sys.path.append(utils_path)
|
| 80 |
+
|
| 81 |
+
from azimuth import model_comparison as az
|
| 82 |
+
scores = az.predict(full_seqs) * 100
|
| 83 |
+
else:
|
| 84 |
+
raise ValueError(f"Unknown algorithm: {algorithm}")
|
| 85 |
+
|
| 86 |
+
# Insert -1 scores for rejected sequences
|
| 87 |
+
for i in reject_list:
|
| 88 |
+
scores = np.insert(scores, i, -1)
|
| 89 |
+
|
| 90 |
+
return scores, reject_list, guide_list
|
| 91 |
+
|
| 92 |
+
return None, reject_list, guide_list
|
| 93 |
+
|
| 94 |
+
except Exception as e:
|
| 95 |
+
self.logger.error(f"Error scoring sequences: {str(e)}")
|
| 96 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 97 |
+
return None, [], []
|
|
@@ -40,6 +40,17 @@ class ViewTargetsModel(HomeWindowModel):
|
|
| 40 |
self._chromosome_seqs = {}
|
| 41 |
self._cached_targets = {} # Add cache for targets
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
def cleanup(self):
|
| 44 |
"""Cleanup method to be called when the view is closed"""
|
| 45 |
try:
|
|
@@ -59,7 +70,7 @@ class ViewTargetsModel(HomeWindowModel):
|
|
| 59 |
def _on_annotation_file_changed(self, new_annotation_file):
|
| 60 |
"""Clear all caches when annotation file changes"""
|
| 61 |
try:
|
| 62 |
-
self.
|
| 63 |
self._gene_data_cache.clear()
|
| 64 |
self._sequence_cache.clear()
|
| 65 |
self._parser_cache.clear()
|
|
@@ -77,98 +88,91 @@ class ViewTargetsModel(HomeWindowModel):
|
|
| 77 |
self._chromosome_seqs = {}
|
| 78 |
|
| 79 |
except Exception as e:
|
| 80 |
-
self.
|
| 81 |
|
| 82 |
def load_targets(self, selected_targets, organism, endonuclease):
|
| 83 |
"""Fast target loading with minimal file operations"""
|
| 84 |
-
|
| 85 |
|
| 86 |
try:
|
| 87 |
-
self.
|
| 88 |
|
| 89 |
# Store organism and endonuclease for potential reloading
|
| 90 |
self.organism = organism
|
| 91 |
self.endonuclease = endonuclease
|
| 92 |
-
|
| 93 |
# Get CSPR parser from cache or create new one
|
| 94 |
parser_start = time.time()
|
| 95 |
cspr_key = f"{organism}_{endonuclease}"
|
| 96 |
if cspr_key in self._parser_cache:
|
| 97 |
self.cspr_parser = self._parser_cache[cspr_key]
|
|
|
|
| 98 |
else:
|
| 99 |
org_files = self.get_organism_to_files()
|
| 100 |
if organism not in org_files or endonuclease not in org_files[organism]:
|
| 101 |
-
self.
|
| 102 |
return
|
| 103 |
|
| 104 |
cspr_file = org_files[organism][endonuclease][0]
|
| 105 |
cspr_path = os.path.join(self.global_settings.get_db_path(), cspr_file)
|
| 106 |
self.cspr_parser = CSPRparser(cspr_path, self.global_settings.get_casper_info_path())
|
| 107 |
self._parser_cache[cspr_key] = self.cspr_parser
|
|
|
|
| 108 |
parser_time = time.time() - parser_start
|
|
|
|
| 109 |
|
| 110 |
# Initialize targets and genes
|
|
|
|
| 111 |
self.targets = []
|
| 112 |
self.available_genes = set()
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
#
|
| 115 |
-
|
| 116 |
-
annotation_start = time.time()
|
| 117 |
-
self.annotation_parser = AnnotationParser(self.global_settings)
|
| 118 |
-
annotation_files = self.get_annotation_files()
|
| 119 |
-
if annotation_files:
|
| 120 |
-
self.annotation_path = os.path.join(self.global_settings.get_db_path(), 'GBFF', annotation_files[0])
|
| 121 |
-
self.annotation_parser.set_annotation_file(self.annotation_path)
|
| 122 |
-
annotation_time = time.time() - annotation_start
|
| 123 |
-
else:
|
| 124 |
-
annotation_time = 0
|
| 125 |
-
|
| 126 |
-
# Process targets in batches by chromosome
|
| 127 |
-
processing_start = time.time()
|
| 128 |
-
|
| 129 |
-
# Group targets by chromosome and prepare batch reading
|
| 130 |
batch_targets = defaultdict(list)
|
| 131 |
for target in selected_targets:
|
| 132 |
chrom = target['chromosome']
|
| 133 |
start, end = map(int, target['location'].split('-'))
|
| 134 |
batch_targets[chrom].append({
|
| 135 |
'feature_name': target['feature_name'],
|
|
|
|
| 136 |
'start': start,
|
| 137 |
'end': end
|
| 138 |
})
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
-
#
|
|
|
|
| 142 |
target_count = 0
|
| 143 |
for chrom, targets in batch_targets.items():
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
#
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
)
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
processing_time = time.time() - processing_start
|
| 161 |
|
| 162 |
-
|
| 163 |
-
self.
|
|
|
|
| 164 |
|
| 165 |
-
total_time = time.time() - start_time
|
| 166 |
-
self.global_settings.logger.debug(f"Total load_targets execution time: {total_time:.2f} seconds")
|
| 167 |
-
self.global_settings.logger.debug(f"Found {target_count} total CSPR targets")
|
| 168 |
-
|
| 169 |
except Exception as e:
|
| 170 |
-
self.
|
| 171 |
-
|
| 172 |
|
| 173 |
def _get_chromosome_sequence(self, chromosome):
|
| 174 |
"""Get chromosome sequence on demand"""
|
|
@@ -190,118 +194,60 @@ class ViewTargetsModel(HomeWindowModel):
|
|
| 190 |
if self.annotation_path:
|
| 191 |
self.annotation_parser.set_annotation_file(self.annotation_path)
|
| 192 |
|
| 193 |
-
def get_gene_data(self,
|
| 194 |
-
"""Get gene data with
|
| 195 |
try:
|
| 196 |
-
if not
|
| 197 |
-
self.
|
| 198 |
return None
|
| 199 |
|
| 200 |
# Check model cache first
|
| 201 |
-
if
|
| 202 |
-
return self._gene_data_cache[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
-
#
|
| 205 |
-
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
-
# Get gene data from parser
|
| 209 |
-
gene_data = self.annotation_parser.get_gene_data(gene_name)
|
| 210 |
if gene_data:
|
| 211 |
-
self._gene_data_cache[
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
return gene_data
|
| 214 |
|
| 215 |
except Exception as e:
|
| 216 |
-
self.
|
|
|
|
| 217 |
return None
|
| 218 |
|
| 219 |
def get_targets(self):
|
|
|
|
| 220 |
return self.targets
|
| 221 |
|
| 222 |
-
def highlight_targets_in_gene_viewer(self, selected_targets):
|
| 223 |
-
"""Highlight selected targets in gene viewer"""
|
| 224 |
-
try:
|
| 225 |
-
self.global_settings.logger.debug("Starting highlight_targets_in_gene_viewer")
|
| 226 |
-
sequence = self.extended_sequence
|
| 227 |
-
if not sequence:
|
| 228 |
-
self.global_settings.logger.error("No extended sequence available")
|
| 229 |
-
return sequence
|
| 230 |
-
|
| 231 |
-
self.global_settings.logger.debug(f"Extended sequence length: {len(sequence)}")
|
| 232 |
-
|
| 233 |
-
# Sort targets by position for efficient highlighting
|
| 234 |
-
highlights = []
|
| 235 |
-
for target in selected_targets:
|
| 236 |
-
self.global_settings.logger.debug(f"Processing target: {target}")
|
| 237 |
-
sequence_to_find = target['sequence']
|
| 238 |
-
strand = target['strand']
|
| 239 |
-
|
| 240 |
-
# For negative strand, we need to use reverse complement
|
| 241 |
-
if strand == '-':
|
| 242 |
-
sequence_to_find = str(Seq(sequence_to_find).reverse_complement())
|
| 243 |
-
self.global_settings.logger.debug(f"Reverse complemented sequence: {sequence_to_find}")
|
| 244 |
-
|
| 245 |
-
# Search for the sequence in the gene viewer text
|
| 246 |
-
sequence_upper = sequence.upper()
|
| 247 |
-
target_upper = sequence_to_find.upper()
|
| 248 |
-
|
| 249 |
-
self.global_settings.logger.debug(f"Searching for sequence: {target_upper}")
|
| 250 |
-
|
| 251 |
-
# Find all occurrences
|
| 252 |
-
pos = sequence_upper.find(target_upper)
|
| 253 |
-
if pos != -1:
|
| 254 |
-
self.global_settings.logger.debug(f"Found sequence at position: {pos}")
|
| 255 |
-
color = 'red' if strand == '-' else 'green'
|
| 256 |
-
highlights.append((pos, len(sequence_to_find), color))
|
| 257 |
-
else:
|
| 258 |
-
self.global_settings.logger.warning(f"Sequence not found: {target_upper}")
|
| 259 |
-
|
| 260 |
-
if not highlights:
|
| 261 |
-
self.global_settings.logger.error("No sequences could be highlighted")
|
| 262 |
-
return sequence
|
| 263 |
-
|
| 264 |
-
self.global_settings.logger.debug(f"Found {len(highlights)} sequences to highlight")
|
| 265 |
-
|
| 266 |
-
# Build highlighted sequence
|
| 267 |
-
result = []
|
| 268 |
-
last_pos = 0
|
| 269 |
-
for pos, length, color in highlights:
|
| 270 |
-
result.append(sequence[last_pos:pos])
|
| 271 |
-
result.append(f"<span style='background-color: {color};'>")
|
| 272 |
-
result.append(sequence[pos:pos+length])
|
| 273 |
-
result.append("</span>")
|
| 274 |
-
last_pos = pos + length
|
| 275 |
-
|
| 276 |
-
result.append(sequence[last_pos:])
|
| 277 |
-
final_sequence = ''.join(result)
|
| 278 |
-
|
| 279 |
-
self.global_settings.logger.debug(f"Final highlighted sequence length: {len(final_sequence)}")
|
| 280 |
-
return final_sequence
|
| 281 |
-
|
| 282 |
-
except Exception as e:
|
| 283 |
-
self.global_settings.logger.error(f"Error highlighting targets: {str(e)}\n{traceback.format_exc()}")
|
| 284 |
-
return sequence
|
| 285 |
-
|
| 286 |
def get_available_genes(self):
|
| 287 |
-
"""Get list of available genes
|
| 288 |
try:
|
| 289 |
-
# Return the available genes list that was populated during load_targets
|
| 290 |
if hasattr(self, 'available_genes'):
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
for target in self.targets:
|
| 296 |
-
if 'feature_name' in target:
|
| 297 |
-
genes.add(target['feature_name'])
|
| 298 |
-
|
| 299 |
-
# Store for future use
|
| 300 |
-
self.available_genes = sorted(list(genes))
|
| 301 |
-
return self.available_genes
|
| 302 |
-
|
| 303 |
except Exception as e:
|
| 304 |
-
self.
|
| 305 |
return []
|
| 306 |
|
| 307 |
# ... (other methods remain unchanged)
|
|
@@ -315,3 +261,90 @@ class ViewTargetsModel(HomeWindowModel):
|
|
| 315 |
except Exception as e:
|
| 316 |
logging.error(f"Error processing target: {e}")
|
| 317 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
self._chromosome_seqs = {}
|
| 41 |
self._cached_targets = {} # Add cache for targets
|
| 42 |
|
| 43 |
+
# Connect to annotation file changes
|
| 44 |
+
self.global_settings.annotation_file_changed.connect(self._on_annotation_file_changed)
|
| 45 |
+
|
| 46 |
+
# Initialize annotation path
|
| 47 |
+
self.annotation_path = os.path.join(
|
| 48 |
+
self.global_settings.get_db_path(),
|
| 49 |
+
'GBFF',
|
| 50 |
+
self.global_settings.get_current_annotation_file()
|
| 51 |
+
)
|
| 52 |
+
self.logger.debug(f"Initialized annotation path: {self.annotation_path}")
|
| 53 |
+
|
| 54 |
def cleanup(self):
|
| 55 |
"""Cleanup method to be called when the view is closed"""
|
| 56 |
try:
|
|
|
|
| 70 |
def _on_annotation_file_changed(self, new_annotation_file):
|
| 71 |
"""Clear all caches when annotation file changes"""
|
| 72 |
try:
|
| 73 |
+
self.logger.debug(f"ViewTargetsModel clearing caches for new annotation file: {new_annotation_file}")
|
| 74 |
self._gene_data_cache.clear()
|
| 75 |
self._sequence_cache.clear()
|
| 76 |
self._parser_cache.clear()
|
|
|
|
| 88 |
self._chromosome_seqs = {}
|
| 89 |
|
| 90 |
except Exception as e:
|
| 91 |
+
self.logger.error(f"Error in _on_annotation_file_changed: {str(e)}")
|
| 92 |
|
| 93 |
def load_targets(self, selected_targets, organism, endonuclease):
|
| 94 |
"""Fast target loading with minimal file operations"""
|
| 95 |
+
total_start = time.time()
|
| 96 |
|
| 97 |
try:
|
| 98 |
+
self.logger.debug(f"Starting load_targets with {len(selected_targets)} targets")
|
| 99 |
|
| 100 |
# Store organism and endonuclease for potential reloading
|
| 101 |
self.organism = organism
|
| 102 |
self.endonuclease = endonuclease
|
| 103 |
+
|
| 104 |
# Get CSPR parser from cache or create new one
|
| 105 |
parser_start = time.time()
|
| 106 |
cspr_key = f"{organism}_{endonuclease}"
|
| 107 |
if cspr_key in self._parser_cache:
|
| 108 |
self.cspr_parser = self._parser_cache[cspr_key]
|
| 109 |
+
self.logger.debug("Using cached CSPR parser")
|
| 110 |
else:
|
| 111 |
org_files = self.get_organism_to_files()
|
| 112 |
if organism not in org_files or endonuclease not in org_files[organism]:
|
| 113 |
+
self.logger.error(f"No CSPR file found for {organism} and {endonuclease}")
|
| 114 |
return
|
| 115 |
|
| 116 |
cspr_file = org_files[organism][endonuclease][0]
|
| 117 |
cspr_path = os.path.join(self.global_settings.get_db_path(), cspr_file)
|
| 118 |
self.cspr_parser = CSPRparser(cspr_path, self.global_settings.get_casper_info_path())
|
| 119 |
self._parser_cache[cspr_key] = self.cspr_parser
|
| 120 |
+
self.logger.debug("Created new CSPR parser")
|
| 121 |
parser_time = time.time() - parser_start
|
| 122 |
+
self.logger.debug(f"CSPR parser initialization time: {parser_time:.2f} seconds")
|
| 123 |
|
| 124 |
# Initialize targets and genes
|
| 125 |
+
init_start = time.time()
|
| 126 |
self.targets = []
|
| 127 |
self.available_genes = set()
|
| 128 |
+
init_time = time.time() - init_start
|
| 129 |
+
self.logger.debug(f"Initialization time: {init_time:.2f} seconds")
|
| 130 |
|
| 131 |
+
# Group targets by chromosome
|
| 132 |
+
group_start = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
batch_targets = defaultdict(list)
|
| 134 |
for target in selected_targets:
|
| 135 |
chrom = target['chromosome']
|
| 136 |
start, end = map(int, target['location'].split('-'))
|
| 137 |
batch_targets[chrom].append({
|
| 138 |
'feature_name': target['feature_name'],
|
| 139 |
+
'feature_id': target['feature_id'], # Include feature_id (locus_tag)
|
| 140 |
'start': start,
|
| 141 |
'end': end
|
| 142 |
})
|
| 143 |
+
# Store both feature_id and feature_name
|
| 144 |
+
self.available_genes.add((target['feature_id'], target['feature_name']))
|
| 145 |
+
group_time = time.time() - group_start
|
| 146 |
+
self.logger.debug(f"Target grouping time: {group_time:.2f} seconds")
|
| 147 |
|
| 148 |
+
# Process targets by chromosome
|
| 149 |
+
process_start = time.time()
|
| 150 |
target_count = 0
|
| 151 |
for chrom, targets in batch_targets.items():
|
| 152 |
+
batch_start = time.time()
|
| 153 |
+
results = self.cspr_parser.read_targets_batch(chrom, targets, endonuclease)
|
| 154 |
+
# Add feature_id to each result
|
| 155 |
+
for result in results:
|
| 156 |
+
# Find matching target to get feature_id
|
| 157 |
+
for target in targets:
|
| 158 |
+
if (target['start'] <= result['position'] <= target['end'] and
|
| 159 |
+
target['feature_name'] == result['feature_name']):
|
| 160 |
+
result['feature_id'] = target['feature_id']
|
| 161 |
+
break
|
| 162 |
+
self.targets.extend(results)
|
| 163 |
+
target_count += len(results)
|
| 164 |
+
batch_time = time.time() - batch_start
|
| 165 |
+
self.logger.debug(f"Chromosome {chrom} processing time: {batch_time:.2f} seconds")
|
| 166 |
+
process_time = time.time() - process_start
|
| 167 |
+
self.logger.debug(f"Total target processing time: {process_time:.2f} seconds")
|
|
|
|
| 168 |
|
| 169 |
+
total_time = time.time() - total_start
|
| 170 |
+
self.logger.debug(f"Total load_targets execution time: {total_time:.2f} seconds")
|
| 171 |
+
self.logger.debug(f"Found {target_count} total CSPR targets")
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
except Exception as e:
|
| 174 |
+
self.logger.error(f"Error in load_targets: {str(e)}")
|
| 175 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 176 |
|
| 177 |
def _get_chromosome_sequence(self, chromosome):
|
| 178 |
"""Get chromosome sequence on demand"""
|
|
|
|
| 194 |
if self.annotation_path:
|
| 195 |
self.annotation_parser.set_annotation_file(self.annotation_path)
|
| 196 |
|
| 197 |
+
def get_gene_data(self, locus_tag):
|
| 198 |
+
"""Get gene data with proper error handling"""
|
| 199 |
try:
|
| 200 |
+
if not locus_tag:
|
| 201 |
+
self.logger.debug("No locus tag provided")
|
| 202 |
return None
|
| 203 |
|
| 204 |
# Check model cache first
|
| 205 |
+
if locus_tag in self._gene_data_cache:
|
| 206 |
+
return self._gene_data_cache[locus_tag]
|
| 207 |
+
|
| 208 |
+
# Initialize annotation parser if not already done
|
| 209 |
+
if not hasattr(self, 'annotation_parser') or self.annotation_parser is None:
|
| 210 |
+
self.annotation_parser = AnnotationParser(self.global_settings)
|
| 211 |
+
annotation_file = self.global_settings.get_current_annotation_file()
|
| 212 |
+
annotation_path = os.path.join(self.global_settings.get_db_path(), 'GBFF', annotation_file)
|
| 213 |
+
self.annotation_parser.set_annotation_file(annotation_path)
|
| 214 |
+
self.logger.debug(f"Initialized annotation parser with file: {annotation_path}")
|
| 215 |
|
| 216 |
+
# Get gene data from parser with proper string conversion
|
| 217 |
+
gene_data = None
|
| 218 |
+
if isinstance(locus_tag, (str, int)):
|
| 219 |
+
locus_tag_str = str(locus_tag).strip()
|
| 220 |
+
self.logger.debug(f"Searching for locus tag: {locus_tag_str}")
|
| 221 |
+
# Look up by locus tag directly
|
| 222 |
+
gene_data = self.annotation_parser.get_gene_data(locus_tag_str.lower())
|
| 223 |
|
|
|
|
|
|
|
| 224 |
if gene_data:
|
| 225 |
+
self._gene_data_cache[locus_tag] = gene_data
|
| 226 |
+
self.logger.debug(f"Found gene data: {gene_data.keys()}")
|
| 227 |
+
else:
|
| 228 |
+
self.logger.debug(f"No gene data found for locus tag: {locus_tag}")
|
| 229 |
|
| 230 |
return gene_data
|
| 231 |
|
| 232 |
except Exception as e:
|
| 233 |
+
self.logger.error(f"Error getting gene data: {str(e)}")
|
| 234 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 235 |
return None
|
| 236 |
|
| 237 |
def get_targets(self):
|
| 238 |
+
"""Return all targets with their feature IDs"""
|
| 239 |
return self.targets
|
| 240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
def get_available_genes(self):
|
| 242 |
+
"""Get list of available genes with format 'feature_id: feature_name'"""
|
| 243 |
try:
|
|
|
|
| 244 |
if hasattr(self, 'available_genes'):
|
| 245 |
+
# Format as "feature_id: feature_name"
|
| 246 |
+
return [f"{feature_id}: {feature_name}"
|
| 247 |
+
for feature_id, feature_name in sorted(self.available_genes)]
|
| 248 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
except Exception as e:
|
| 250 |
+
self.logger.error(f"Error getting available genes: {str(e)}")
|
| 251 |
return []
|
| 252 |
|
| 253 |
# ... (other methods remain unchanged)
|
|
|
|
| 261 |
except Exception as e:
|
| 262 |
logging.error(f"Error processing target: {e}")
|
| 263 |
return None
|
| 264 |
+
|
| 265 |
+
def get_gene_sequence(self, locus_tag):
|
| 266 |
+
"""Get gene sequence with optimized caching and minimal I/O"""
|
| 267 |
+
try:
|
| 268 |
+
# Check sequence cache first
|
| 269 |
+
cache_key = f"{locus_tag}_sequence"
|
| 270 |
+
if cache_key in self._sequence_cache:
|
| 271 |
+
self.logger.debug(f"Cache hit for gene sequence: {locus_tag}")
|
| 272 |
+
return self._sequence_cache[cache_key]
|
| 273 |
+
|
| 274 |
+
# Get gene data which includes location information
|
| 275 |
+
print(f"Getting gene data for locus tag: {locus_tag}")
|
| 276 |
+
gene_data = self.get_gene_data(locus_tag)
|
| 277 |
+
if not gene_data or 'info' not in gene_data:
|
| 278 |
+
self.logger.warning(f"No gene data found for locus tag: {locus_tag}")
|
| 279 |
+
return None
|
| 280 |
+
|
| 281 |
+
# Parse location string (format: "start:end(strand)")
|
| 282 |
+
location = gene_data['info']['location']
|
| 283 |
+
if ':' not in location:
|
| 284 |
+
self.logger.warning(f"Invalid location format: {location}")
|
| 285 |
+
return None
|
| 286 |
+
|
| 287 |
+
# Extract start and end positions
|
| 288 |
+
start = int(location.split(':')[0])
|
| 289 |
+
end = int(location.split(':')[1].split('(')[0])
|
| 290 |
+
chromosome = gene_data['info']['chromosome']
|
| 291 |
+
|
| 292 |
+
# Get sequence from gene_data directly if available
|
| 293 |
+
if 'sequence' in gene_data:
|
| 294 |
+
sequence = gene_data['sequence']
|
| 295 |
+
|
| 296 |
+
# Add padding (30 bases on each side)
|
| 297 |
+
padding = 30
|
| 298 |
+
seq_start = max(0, start - padding)
|
| 299 |
+
seq_end = min(len(sequence), end + padding)
|
| 300 |
+
|
| 301 |
+
# Get sequence with padding
|
| 302 |
+
five_prime_pad = sequence[seq_start:start].lower() if seq_start < start else ""
|
| 303 |
+
main_seq = sequence[start:end].upper()
|
| 304 |
+
three_prime_pad = sequence[end:seq_end].lower() if end < seq_end else ""
|
| 305 |
+
|
| 306 |
+
full_sequence = five_prime_pad + main_seq + three_prime_pad
|
| 307 |
+
|
| 308 |
+
# Cache the result
|
| 309 |
+
result = {
|
| 310 |
+
'sequence': full_sequence,
|
| 311 |
+
'chrom_length': len(sequence),
|
| 312 |
+
'start': start,
|
| 313 |
+
'end': end,
|
| 314 |
+
'padded_start': seq_start,
|
| 315 |
+
'padded_end': seq_end
|
| 316 |
+
}
|
| 317 |
+
self._sequence_cache[cache_key] = result
|
| 318 |
+
|
| 319 |
+
self.logger.debug(f"Retrieved and cached sequence for locus tag {locus_tag} ({len(full_sequence)} bp)")
|
| 320 |
+
return result
|
| 321 |
+
|
| 322 |
+
except Exception as e:
|
| 323 |
+
self.logger.error(f"Error getting gene sequence: {str(e)}")
|
| 324 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 325 |
+
return None
|
| 326 |
+
|
| 327 |
+
def get_scoring_options(self):
|
| 328 |
+
"""Get current scoring options"""
|
| 329 |
+
try:
|
| 330 |
+
if not hasattr(self, 'scoring_options'):
|
| 331 |
+
self.scoring_options = {
|
| 332 |
+
'algorithm': 'Azimuth 2.0',
|
| 333 |
+
'fasta_file': '',
|
| 334 |
+
'min_score': 0,
|
| 335 |
+
'max_score': 100
|
| 336 |
+
}
|
| 337 |
+
return self.scoring_options
|
| 338 |
+
|
| 339 |
+
except Exception as e:
|
| 340 |
+
self.logger.error(f"Error getting scoring options: {str(e)}")
|
| 341 |
+
return {}
|
| 342 |
+
|
| 343 |
+
def set_scoring_options(self, options):
|
| 344 |
+
"""Set scoring options"""
|
| 345 |
+
try:
|
| 346 |
+
self.scoring_options = options
|
| 347 |
+
self.logger.debug(f"Updated scoring options: {options}")
|
| 348 |
+
|
| 349 |
+
except Exception as e:
|
| 350 |
+
self.logger.error(f"Error setting scoring options: {str(e)}")
|
|
@@ -20,7 +20,7 @@
|
|
| 20 |
</property>
|
| 21 |
<widget class="QWidget" name="centralwidget">
|
| 22 |
<layout class="QGridLayout" name="gridLayout_2">
|
| 23 |
-
<item row="
|
| 24 |
<layout class="QHBoxLayout" name="boxlayhbotButtons">
|
| 25 |
<property name="sizeConstraint">
|
| 26 |
<enum>QLayout::SetDefaultConstraint</enum>
|
|
@@ -86,7 +86,7 @@
|
|
| 86 |
</item>
|
| 87 |
</layout>
|
| 88 |
</item>
|
| 89 |
-
<item row="
|
| 90 |
<layout class="QGridLayout" name="gridLayout">
|
| 91 |
<item row="0" column="0">
|
| 92 |
<layout class="QVBoxLayout" name="verticalLayout">
|
|
@@ -194,6 +194,33 @@
|
|
| 194 |
</item>
|
| 195 |
</layout>
|
| 196 |
</item>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
</layout>
|
| 198 |
</widget>
|
| 199 |
</item>
|
|
@@ -459,39 +486,6 @@
|
|
| 459 |
</item>
|
| 460 |
</layout>
|
| 461 |
</item>
|
| 462 |
-
<item row="6" column="0">
|
| 463 |
-
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
| 464 |
-
<property name="spacing">
|
| 465 |
-
<number>-1</number>
|
| 466 |
-
</property>
|
| 467 |
-
<property name="sizeConstraint">
|
| 468 |
-
<enum>QLayout::SetDefaultConstraint</enum>
|
| 469 |
-
</property>
|
| 470 |
-
<property name="bottomMargin">
|
| 471 |
-
<number>0</number>
|
| 472 |
-
</property>
|
| 473 |
-
<item>
|
| 474 |
-
<spacer name="horizontalSpacer">
|
| 475 |
-
<property name="orientation">
|
| 476 |
-
<enum>Qt::Horizontal</enum>
|
| 477 |
-
</property>
|
| 478 |
-
<property name="sizeHint" stdset="0">
|
| 479 |
-
<size>
|
| 480 |
-
<width>40</width>
|
| 481 |
-
<height>20</height>
|
| 482 |
-
</size>
|
| 483 |
-
</property>
|
| 484 |
-
</spacer>
|
| 485 |
-
</item>
|
| 486 |
-
<item>
|
| 487 |
-
<widget class="QLabel" name="lblRequired">
|
| 488 |
-
<property name="text">
|
| 489 |
-
<string><html><head/><body><p><span style=" color:#fc0107;">* Required</span></p></body></html></string>
|
| 490 |
-
</property>
|
| 491 |
-
</widget>
|
| 492 |
-
</item>
|
| 493 |
-
</layout>
|
| 494 |
-
</item>
|
| 495 |
</layout>
|
| 496 |
</widget>
|
| 497 |
</widget>
|
|
|
|
| 20 |
</property>
|
| 21 |
<widget class="QWidget" name="centralwidget">
|
| 22 |
<layout class="QGridLayout" name="gridLayout_2">
|
| 23 |
+
<item row="10" column="0">
|
| 24 |
<layout class="QHBoxLayout" name="boxlayhbotButtons">
|
| 25 |
<property name="sizeConstraint">
|
| 26 |
<enum>QLayout::SetDefaultConstraint</enum>
|
|
|
|
| 86 |
</item>
|
| 87 |
</layout>
|
| 88 |
</item>
|
| 89 |
+
<item row="9" column="0">
|
| 90 |
<layout class="QGridLayout" name="gridLayout">
|
| 91 |
<item row="0" column="0">
|
| 92 |
<layout class="QVBoxLayout" name="verticalLayout">
|
|
|
|
| 194 |
</item>
|
| 195 |
</layout>
|
| 196 |
</item>
|
| 197 |
+
<item row="0" column="0">
|
| 198 |
+
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
| 199 |
+
<property name="bottomMargin">
|
| 200 |
+
<number>0</number>
|
| 201 |
+
</property>
|
| 202 |
+
<item>
|
| 203 |
+
<spacer name="horizontalSpacer">
|
| 204 |
+
<property name="orientation">
|
| 205 |
+
<enum>Qt::Horizontal</enum>
|
| 206 |
+
</property>
|
| 207 |
+
<property name="sizeHint" stdset="0">
|
| 208 |
+
<size>
|
| 209 |
+
<width>40</width>
|
| 210 |
+
<height>20</height>
|
| 211 |
+
</size>
|
| 212 |
+
</property>
|
| 213 |
+
</spacer>
|
| 214 |
+
</item>
|
| 215 |
+
<item>
|
| 216 |
+
<widget class="QLabel" name="lblRequired">
|
| 217 |
+
<property name="text">
|
| 218 |
+
<string><html><head/><body><p><span style=" color:#fc0107;">* Required</span></p></body></html></string>
|
| 219 |
+
</property>
|
| 220 |
+
</widget>
|
| 221 |
+
</item>
|
| 222 |
+
</layout>
|
| 223 |
+
</item>
|
| 224 |
</layout>
|
| 225 |
</widget>
|
| 226 |
</item>
|
|
|
|
| 486 |
</item>
|
| 487 |
</layout>
|
| 488 |
</item>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
</layout>
|
| 490 |
</widget>
|
| 491 |
</widget>
|
|
@@ -15,56 +15,8 @@
|
|
| 15 |
</property>
|
| 16 |
<widget class="QWidget" name="centralwidget">
|
| 17 |
<layout class="QGridLayout" name="gridLayout">
|
| 18 |
-
<item row="
|
| 19 |
-
<
|
| 20 |
-
<property name="orientation">
|
| 21 |
-
<enum>Qt::Horizontal</enum>
|
| 22 |
-
</property>
|
| 23 |
-
<property name="sizeType">
|
| 24 |
-
<enum>QSizePolicy::Fixed</enum>
|
| 25 |
-
</property>
|
| 26 |
-
<property name="sizeHint" stdset="0">
|
| 27 |
-
<size>
|
| 28 |
-
<width>5</width>
|
| 29 |
-
<height>20</height>
|
| 30 |
-
</size>
|
| 31 |
-
</property>
|
| 32 |
-
</spacer>
|
| 33 |
-
</item>
|
| 34 |
-
<item row="2" column="1">
|
| 35 |
-
<spacer name="verticalSpacer">
|
| 36 |
-
<property name="orientation">
|
| 37 |
-
<enum>Qt::Vertical</enum>
|
| 38 |
-
</property>
|
| 39 |
-
<property name="sizeType">
|
| 40 |
-
<enum>QSizePolicy::Fixed</enum>
|
| 41 |
-
</property>
|
| 42 |
-
<property name="sizeHint" stdset="0">
|
| 43 |
-
<size>
|
| 44 |
-
<width>20</width>
|
| 45 |
-
<height>5</height>
|
| 46 |
-
</size>
|
| 47 |
-
</property>
|
| 48 |
-
</spacer>
|
| 49 |
-
</item>
|
| 50 |
-
<item row="1" column="0">
|
| 51 |
-
<spacer name="horizontalSpacer">
|
| 52 |
-
<property name="orientation">
|
| 53 |
-
<enum>Qt::Horizontal</enum>
|
| 54 |
-
</property>
|
| 55 |
-
<property name="sizeType">
|
| 56 |
-
<enum>QSizePolicy::Fixed</enum>
|
| 57 |
-
</property>
|
| 58 |
-
<property name="sizeHint" stdset="0">
|
| 59 |
-
<size>
|
| 60 |
-
<width>5</width>
|
| 61 |
-
<height>20</height>
|
| 62 |
-
</size>
|
| 63 |
-
</property>
|
| 64 |
-
</spacer>
|
| 65 |
-
</item>
|
| 66 |
-
<item row="1" column="1">
|
| 67 |
-
<widget class="QGroupBox" name="groupBox">
|
| 68 |
<property name="sizePolicy">
|
| 69 |
<sizepolicy hsizetype="Minimum" vsizetype="Minimum">
|
| 70 |
<horstretch>0</horstretch>
|
|
@@ -76,7 +28,7 @@
|
|
| 76 |
</property>
|
| 77 |
<layout class="QGridLayout" name="gridLayout_2">
|
| 78 |
<item row="0" column="0" colspan="4">
|
| 79 |
-
<widget class="QLabel" name="
|
| 80 |
<property name="sizePolicy">
|
| 81 |
<sizepolicy hsizetype="Preferred" vsizetype="Minimum">
|
| 82 |
<horstretch>0</horstretch>
|
|
@@ -92,21 +44,21 @@
|
|
| 92 |
</widget>
|
| 93 |
</item>
|
| 94 |
<item row="1" column="3">
|
| 95 |
-
<widget class="QPushButton" name="
|
| 96 |
<property name="text">
|
| 97 |
<string>Browse...</string>
|
| 98 |
</property>
|
| 99 |
</widget>
|
| 100 |
</item>
|
| 101 |
<item row="2" column="0">
|
| 102 |
-
<widget class="QLabel" name="
|
| 103 |
<property name="text">
|
| 104 |
<string>Select Algorithm:</string>
|
| 105 |
</property>
|
| 106 |
</widget>
|
| 107 |
</item>
|
| 108 |
<item row="1" column="0">
|
| 109 |
-
<widget class="QLabel" name="
|
| 110 |
<property name="sizePolicy">
|
| 111 |
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
| 112 |
<horstretch>0</horstretch>
|
|
@@ -122,7 +74,7 @@
|
|
| 122 |
</widget>
|
| 123 |
</item>
|
| 124 |
<item row="2" column="1">
|
| 125 |
-
<widget class="QRadioButton" name="
|
| 126 |
<property name="text">
|
| 127 |
<string>Azimuth 2.0</string>
|
| 128 |
</property>
|
|
@@ -131,22 +83,15 @@
|
|
| 131 |
</property>
|
| 132 |
</widget>
|
| 133 |
</item>
|
| 134 |
-
<item row="4" column="0" colspan="3">
|
| 135 |
-
<widget class="QProgressBar" name="progressBar">
|
| 136 |
-
<property name="value">
|
| 137 |
-
<number>0</number>
|
| 138 |
-
</property>
|
| 139 |
-
</widget>
|
| 140 |
-
</item>
|
| 141 |
<item row="4" column="3">
|
| 142 |
-
<widget class="QPushButton" name="
|
| 143 |
<property name="text">
|
| 144 |
<string>Submit</string>
|
| 145 |
</property>
|
| 146 |
</widget>
|
| 147 |
</item>
|
| 148 |
<item row="1" column="1" colspan="2">
|
| 149 |
-
<widget class="QLineEdit" name="
|
| 150 |
<property name="readOnly">
|
| 151 |
<bool>true</bool>
|
| 152 |
</property>
|
|
@@ -155,22 +100,6 @@
|
|
| 155 |
</layout>
|
| 156 |
</widget>
|
| 157 |
</item>
|
| 158 |
-
<item row="0" column="1">
|
| 159 |
-
<spacer name="verticalSpacer_3">
|
| 160 |
-
<property name="orientation">
|
| 161 |
-
<enum>Qt::Vertical</enum>
|
| 162 |
-
</property>
|
| 163 |
-
<property name="sizeType">
|
| 164 |
-
<enum>QSizePolicy::Fixed</enum>
|
| 165 |
-
</property>
|
| 166 |
-
<property name="sizeHint" stdset="0">
|
| 167 |
-
<size>
|
| 168 |
-
<width>20</width>
|
| 169 |
-
<height>5</height>
|
| 170 |
-
</size>
|
| 171 |
-
</property>
|
| 172 |
-
</spacer>
|
| 173 |
-
</item>
|
| 174 |
</layout>
|
| 175 |
</widget>
|
| 176 |
</widget>
|
|
|
|
| 15 |
</property>
|
| 16 |
<widget class="QWidget" name="centralwidget">
|
| 17 |
<layout class="QGridLayout" name="gridLayout">
|
| 18 |
+
<item row="0" column="0">
|
| 19 |
+
<widget class="QGroupBox" name="grpSelectScoring">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
<property name="sizePolicy">
|
| 21 |
<sizepolicy hsizetype="Minimum" vsizetype="Minimum">
|
| 22 |
<horstretch>0</horstretch>
|
|
|
|
| 28 |
</property>
|
| 29 |
<layout class="QGridLayout" name="gridLayout_2">
|
| 30 |
<item row="0" column="0" colspan="4">
|
| 31 |
+
<widget class="QLabel" name="lblSelectFASTAfile">
|
| 32 |
<property name="sizePolicy">
|
| 33 |
<sizepolicy hsizetype="Preferred" vsizetype="Minimum">
|
| 34 |
<horstretch>0</horstretch>
|
|
|
|
| 44 |
</widget>
|
| 45 |
</item>
|
| 46 |
<item row="1" column="3">
|
| 47 |
+
<widget class="QPushButton" name="pbtnBrowse">
|
| 48 |
<property name="text">
|
| 49 |
<string>Browse...</string>
|
| 50 |
</property>
|
| 51 |
</widget>
|
| 52 |
</item>
|
| 53 |
<item row="2" column="0">
|
| 54 |
+
<widget class="QLabel" name="lblSelectAlgorithm">
|
| 55 |
<property name="text">
|
| 56 |
<string>Select Algorithm:</string>
|
| 57 |
</property>
|
| 58 |
</widget>
|
| 59 |
</item>
|
| 60 |
<item row="1" column="0">
|
| 61 |
+
<widget class="QLabel" name="lblInputFasta">
|
| 62 |
<property name="sizePolicy">
|
| 63 |
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
| 64 |
<horstretch>0</horstretch>
|
|
|
|
| 74 |
</widget>
|
| 75 |
</item>
|
| 76 |
<item row="2" column="1">
|
| 77 |
+
<widget class="QRadioButton" name="rbtnAzimuth">
|
| 78 |
<property name="text">
|
| 79 |
<string>Azimuth 2.0</string>
|
| 80 |
</property>
|
|
|
|
| 83 |
</property>
|
| 84 |
</widget>
|
| 85 |
</item>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
<item row="4" column="3">
|
| 87 |
+
<widget class="QPushButton" name="pbtnSubmit">
|
| 88 |
<property name="text">
|
| 89 |
<string>Submit</string>
|
| 90 |
</property>
|
| 91 |
</widget>
|
| 92 |
</item>
|
| 93 |
<item row="1" column="1" colspan="2">
|
| 94 |
+
<widget class="QLineEdit" name="ledInputFASTA">
|
| 95 |
<property name="readOnly">
|
| 96 |
<bool>true</bool>
|
| 97 |
</property>
|
|
|
|
| 100 |
</layout>
|
| 101 |
</widget>
|
| 102 |
</item>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
</layout>
|
| 104 |
</widget>
|
| 105 |
</widget>
|
|
@@ -6,7 +6,7 @@
|
|
| 6 |
<rect>
|
| 7 |
<x>0</x>
|
| 8 |
<y>0</y>
|
| 9 |
-
<width>
|
| 10 |
<height>916</height>
|
| 11 |
</rect>
|
| 12 |
</property>
|
|
@@ -115,8 +115,8 @@
|
|
| 115 |
<string>Guide Viewer</string>
|
| 116 |
</property>
|
| 117 |
<layout class="QGridLayout" name="gridLayout_4">
|
| 118 |
-
<item row="
|
| 119 |
-
<widget class="
|
| 120 |
<property name="sizePolicy">
|
| 121 |
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
|
| 122 |
<horstretch>0</horstretch>
|
|
@@ -124,14 +124,21 @@
|
|
| 124 |
</sizepolicy>
|
| 125 |
</property>
|
| 126 |
<property name="text">
|
| 127 |
-
<string>
|
| 128 |
</property>
|
| 129 |
</widget>
|
| 130 |
</item>
|
| 131 |
-
<item row="
|
| 132 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
<item>
|
| 134 |
-
<widget class="QComboBox" name="
|
| 135 |
<property name="sizePolicy">
|
| 136 |
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
| 137 |
<horstretch>0</horstretch>
|
|
@@ -148,8 +155,27 @@
|
|
| 148 |
</item>
|
| 149 |
</layout>
|
| 150 |
</item>
|
| 151 |
-
<item row="
|
| 152 |
-
<widget class="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
</item>
|
| 154 |
<item row="5" column="0">
|
| 155 |
<widget class="QLabel" name="lblEndonuclease">
|
|
@@ -164,32 +190,26 @@
|
|
| 164 |
</property>
|
| 165 |
</widget>
|
| 166 |
</item>
|
| 167 |
-
<item row="
|
| 168 |
-
<widget class="
|
|
|
|
|
|
|
|
|
|
| 169 |
<property name="sizePolicy">
|
| 170 |
-
<sizepolicy hsizetype="
|
| 171 |
<horstretch>0</horstretch>
|
| 172 |
<verstretch>0</verstretch>
|
| 173 |
</sizepolicy>
|
| 174 |
</property>
|
| 175 |
-
<property name="minimumSize">
|
| 176 |
-
<size>
|
| 177 |
-
<width>125</width>
|
| 178 |
-
<height>0</height>
|
| 179 |
-
</size>
|
| 180 |
-
</property>
|
| 181 |
-
<property name="toolTip">
|
| 182 |
-
<string><html><head/><body><p><span style=" font-size:12pt;">Additional options for filtering the Guide Viewer Table.</span></p></body></html></string>
|
| 183 |
-
</property>
|
| 184 |
<property name="text">
|
| 185 |
-
<string>
|
| 186 |
</property>
|
| 187 |
</widget>
|
| 188 |
</item>
|
| 189 |
-
<item row="
|
| 190 |
-
<layout class="QHBoxLayout" name="
|
| 191 |
<item>
|
| 192 |
-
<widget class="QComboBox" name="
|
| 193 |
<property name="sizePolicy">
|
| 194 |
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
| 195 |
<horstretch>0</horstretch>
|
|
@@ -206,26 +226,6 @@
|
|
| 206 |
</item>
|
| 207 |
</layout>
|
| 208 |
</item>
|
| 209 |
-
<item row="8" column="2">
|
| 210 |
-
<widget class="QPushButton" name="pbtnScoringOptions">
|
| 211 |
-
<property name="text">
|
| 212 |
-
<string>Scoring Options</string>
|
| 213 |
-
</property>
|
| 214 |
-
</widget>
|
| 215 |
-
</item>
|
| 216 |
-
<item row="4" column="0">
|
| 217 |
-
<widget class="QLabel" name="lblGene">
|
| 218 |
-
<property name="sizePolicy">
|
| 219 |
-
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
|
| 220 |
-
<horstretch>0</horstretch>
|
| 221 |
-
<verstretch>0</verstretch>
|
| 222 |
-
</sizepolicy>
|
| 223 |
-
</property>
|
| 224 |
-
<property name="text">
|
| 225 |
-
<string>Gene:</string>
|
| 226 |
-
</property>
|
| 227 |
-
</widget>
|
| 228 |
-
</item>
|
| 229 |
</layout>
|
| 230 |
</widget>
|
| 231 |
</item>
|
|
|
|
| 6 |
<rect>
|
| 7 |
<x>0</x>
|
| 8 |
<y>0</y>
|
| 9 |
+
<width>1315</width>
|
| 10 |
<height>916</height>
|
| 11 |
</rect>
|
| 12 |
</property>
|
|
|
|
| 115 |
<string>Guide Viewer</string>
|
| 116 |
</property>
|
| 117 |
<layout class="QGridLayout" name="gridLayout_4">
|
| 118 |
+
<item row="4" column="0">
|
| 119 |
+
<widget class="QLabel" name="lblGene">
|
| 120 |
<property name="sizePolicy">
|
| 121 |
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
|
| 122 |
<horstretch>0</horstretch>
|
|
|
|
| 124 |
</sizepolicy>
|
| 125 |
</property>
|
| 126 |
<property name="text">
|
| 127 |
+
<string>Gene:</string>
|
| 128 |
</property>
|
| 129 |
</widget>
|
| 130 |
</item>
|
| 131 |
+
<item row="8" column="2">
|
| 132 |
+
<widget class="QPushButton" name="pbtnScoringOptions">
|
| 133 |
+
<property name="text">
|
| 134 |
+
<string>Scoring Options</string>
|
| 135 |
+
</property>
|
| 136 |
+
</widget>
|
| 137 |
+
</item>
|
| 138 |
+
<item row="5" column="1" colspan="4">
|
| 139 |
+
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
| 140 |
<item>
|
| 141 |
+
<widget class="QComboBox" name="cmbEndonuclease">
|
| 142 |
<property name="sizePolicy">
|
| 143 |
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
| 144 |
<horstretch>0</horstretch>
|
|
|
|
| 155 |
</item>
|
| 156 |
</layout>
|
| 157 |
</item>
|
| 158 |
+
<item row="8" column="1" alignment="Qt::AlignLeft">
|
| 159 |
+
<widget class="QPushButton" name="pbtnFilterOptions">
|
| 160 |
+
<property name="sizePolicy">
|
| 161 |
+
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
|
| 162 |
+
<horstretch>0</horstretch>
|
| 163 |
+
<verstretch>0</verstretch>
|
| 164 |
+
</sizepolicy>
|
| 165 |
+
</property>
|
| 166 |
+
<property name="minimumSize">
|
| 167 |
+
<size>
|
| 168 |
+
<width>125</width>
|
| 169 |
+
<height>0</height>
|
| 170 |
+
</size>
|
| 171 |
+
</property>
|
| 172 |
+
<property name="toolTip">
|
| 173 |
+
<string><html><head/><body><p><span style=" font-size:12pt;">Additional options for filtering the Guide Viewer Table.</span></p></body></html></string>
|
| 174 |
+
</property>
|
| 175 |
+
<property name="text">
|
| 176 |
+
<string>Filter Options</string>
|
| 177 |
+
</property>
|
| 178 |
+
</widget>
|
| 179 |
</item>
|
| 180 |
<item row="5" column="0">
|
| 181 |
<widget class="QLabel" name="lblEndonuclease">
|
|
|
|
| 190 |
</property>
|
| 191 |
</widget>
|
| 192 |
</item>
|
| 193 |
+
<item row="9" column="0" colspan="5">
|
| 194 |
+
<widget class="QTableWidget" name="tblTargets"/>
|
| 195 |
+
</item>
|
| 196 |
+
<item row="8" column="0">
|
| 197 |
+
<widget class="QCheckBox" name="chkSelectAll">
|
| 198 |
<property name="sizePolicy">
|
| 199 |
+
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
|
| 200 |
<horstretch>0</horstretch>
|
| 201 |
<verstretch>0</verstretch>
|
| 202 |
</sizepolicy>
|
| 203 |
</property>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
<property name="text">
|
| 205 |
+
<string>Select All</string>
|
| 206 |
</property>
|
| 207 |
</widget>
|
| 208 |
</item>
|
| 209 |
+
<item row="4" column="1" colspan="4">
|
| 210 |
+
<layout class="QHBoxLayout" name="horizontalLayout">
|
| 211 |
<item>
|
| 212 |
+
<widget class="QComboBox" name="cmbGene">
|
| 213 |
<property name="sizePolicy">
|
| 214 |
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
| 215 |
<horstretch>0</horstretch>
|
|
|
|
| 226 |
</item>
|
| 227 |
</layout>
|
| 228 |
</item>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
</layout>
|
| 230 |
</widget>
|
| 231 |
</item>
|
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
class LoggingMixin:
|
| 5 |
+
"""Mixin class to provide logging functionality to classes"""
|
| 6 |
+
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self._logger: Optional[logging.Logger] = None
|
| 9 |
+
self._init_logger()
|
| 10 |
+
|
| 11 |
+
def _init_logger(self) -> None:
|
| 12 |
+
"""Initialize logger for the class"""
|
| 13 |
+
self._logger = logging.getLogger(self.__class__.__name__)
|
| 14 |
+
|
| 15 |
+
@property
|
| 16 |
+
def logger(self) -> logging.Logger:
|
| 17 |
+
"""Get the logger instance"""
|
| 18 |
+
if self._logger is None:
|
| 19 |
+
self._init_logger()
|
| 20 |
+
return self._logger
|
| 21 |
+
|
| 22 |
+
def log_method_call(self, method_name: str, *args, **kwargs) -> None:
|
| 23 |
+
"""Log method calls with their arguments"""
|
| 24 |
+
self.logger.debug(f"Calling {method_name} with args: {args}, kwargs: {kwargs}")
|
| 25 |
+
|
| 26 |
+
def log_error(self, method_name: str, error: Exception) -> None:
|
| 27 |
+
"""Log errors with method context"""
|
| 28 |
+
self.logger.error(f"Error in {method_name}: {str(error)}", exc_info=True)
|
| 29 |
+
|
| 30 |
+
def log_info(self, message: str) -> None:
|
| 31 |
+
"""Log info messages"""
|
| 32 |
+
self.logger.info(message)
|
| 33 |
+
|
| 34 |
+
def log_debug(self, message: str) -> None:
|
| 35 |
+
"""Log debug messages"""
|
| 36 |
+
self.logger.debug(message)
|
| 37 |
+
|
| 38 |
+
def log_warning(self, message: str) -> None:
|
| 39 |
+
"""Log warning messages"""
|
| 40 |
+
self.logger.warning(message)
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,546 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas
|
| 2 |
+
import time
|
| 3 |
+
import sklearn
|
| 4 |
+
import numpy as np
|
| 5 |
+
import Bio.SeqUtils as SeqUtil
|
| 6 |
+
import Bio.Seq as Seq
|
| 7 |
+
import azimuth.util
|
| 8 |
+
import sys
|
| 9 |
+
from Bio.SeqUtils import MeltingTemp as Tm
|
| 10 |
+
import pickle
|
| 11 |
+
import itertools
|
| 12 |
+
|
| 13 |
+
def featurize_data(data, learn_options, Y, gene_position, pam_audit=True, length_audit=True, quiet=True):
|
| 14 |
+
'''
|
| 15 |
+
assumes that data contains the 30mer
|
| 16 |
+
returns set of features from which one can make a kernel for each one
|
| 17 |
+
'''
|
| 18 |
+
all_lens = data['30mer'].apply(len).values
|
| 19 |
+
unique_lengths = np.unique(all_lens)
|
| 20 |
+
num_lengths = len(unique_lengths)
|
| 21 |
+
assert num_lengths == 1, "should only have sequences of a single length, but found %s: %s" % (num_lengths, str(unique_lengths))
|
| 22 |
+
|
| 23 |
+
if not quiet:
|
| 24 |
+
print("Constructing features...")
|
| 25 |
+
t0 = time.time()
|
| 26 |
+
|
| 27 |
+
feature_sets = {}
|
| 28 |
+
|
| 29 |
+
if learn_options["nuc_features"]:
|
| 30 |
+
# spectrum kernels (position-independent) and weighted degree kernels (position-dependent)
|
| 31 |
+
get_all_order_nuc_features(data['30mer'], feature_sets, learn_options, learn_options["order"], max_index_to_use=30, quiet=quiet)
|
| 32 |
+
|
| 33 |
+
check_feature_set(feature_sets)
|
| 34 |
+
|
| 35 |
+
if learn_options["gc_features"]:
|
| 36 |
+
gc_above_10, gc_below_10, gc_count = gc_features(data, length_audit)
|
| 37 |
+
feature_sets['gc_above_10'] = pandas.DataFrame(gc_above_10)
|
| 38 |
+
feature_sets['gc_below_10'] = pandas.DataFrame(gc_below_10)
|
| 39 |
+
feature_sets['gc_count'] = pandas.DataFrame(gc_count)
|
| 40 |
+
|
| 41 |
+
if learn_options["include_gene_position"]:
|
| 42 |
+
# gene_position_columns = ["Amino Acid Cut position", "Percent Peptide", "Nucleotide cut position"]
|
| 43 |
+
# gene_position_columns = ["Percent Peptide", "Nucleotide cut position"]
|
| 44 |
+
|
| 45 |
+
for set in gene_position.columns:
|
| 46 |
+
set_name = set
|
| 47 |
+
feature_sets[set_name] = pandas.DataFrame(gene_position[set])
|
| 48 |
+
feature_sets["Percent Peptide <50%"] = feature_sets["Percent Peptide"] < 50
|
| 49 |
+
feature_sets["Percent Peptide <50%"]['Percent Peptide <50%'] = feature_sets["Percent Peptide <50%"].pop("Percent Peptide")
|
| 50 |
+
|
| 51 |
+
if learn_options["include_gene_effect"]:
|
| 52 |
+
print("including gene effect")
|
| 53 |
+
gene_names = Y['Target gene']
|
| 54 |
+
enc = sklearn.preprocessing.OneHotEncoder()
|
| 55 |
+
label_encoder = sklearn.preprocessing.LabelEncoder()
|
| 56 |
+
label_encoder.fit(gene_names)
|
| 57 |
+
one_hot_genes = np.array(enc.fit_transform(label_encoder.transform(gene_names)[:, None]).todense())
|
| 58 |
+
feature_sets["gene effect"] = pandas.DataFrame(one_hot_genes,
|
| 59 |
+
columns=["gene_%d" % i for i in range(one_hot_genes.shape[1])], index=gene_names.index)
|
| 60 |
+
|
| 61 |
+
if learn_options['include_known_pairs']:
|
| 62 |
+
feature_sets['known pairs'] = pandas.DataFrame(Y['test'])
|
| 63 |
+
|
| 64 |
+
if learn_options["include_NGGX_interaction"]:
|
| 65 |
+
feature_sets["NGGX"] = NGGX_interaction_feature(data, pam_audit)
|
| 66 |
+
|
| 67 |
+
if learn_options["include_Tm"]:
|
| 68 |
+
feature_sets["Tm"] = Tm_feature(data, pam_audit, learn_options=None)
|
| 69 |
+
|
| 70 |
+
if learn_options["include_sgRNAscore"]:
|
| 71 |
+
feature_sets["sgRNA Score"] = pandas.DataFrame(data["sgRNA Score"])
|
| 72 |
+
|
| 73 |
+
if learn_options["include_drug"]:
|
| 74 |
+
# feature_sets["drug"] = pandas.DataFrame(data["drug"])
|
| 75 |
+
drug_names = Y.index.get_level_values('drug').tolist()
|
| 76 |
+
enc = sklearn.preprocessing.OneHotEncoder()
|
| 77 |
+
label_encoder = sklearn.preprocessing.LabelEncoder()
|
| 78 |
+
label_encoder.fit(drug_names)
|
| 79 |
+
one_hot_drugs = np.array(enc.fit_transform(label_encoder.transform(drug_names)[:, None]).todense())
|
| 80 |
+
feature_sets["drug"] = pandas.DataFrame(one_hot_drugs, columns=["drug_%d" % i for i in range(one_hot_drugs.shape[1])], index=drug_names)
|
| 81 |
+
|
| 82 |
+
if learn_options['include_strand']:
|
| 83 |
+
feature_sets['Strand effect'] = (pandas.DataFrame(data['Strand']) == 'sense')*1
|
| 84 |
+
|
| 85 |
+
if learn_options["include_gene_feature"]:
|
| 86 |
+
feature_sets["gene features"] = gene_feature(Y, data, learn_options)
|
| 87 |
+
|
| 88 |
+
if learn_options["include_gene_guide_feature"] > 0:
|
| 89 |
+
tmp_feature_sets = gene_guide_feature(Y, data, learn_options)
|
| 90 |
+
for key in tmp_feature_sets:
|
| 91 |
+
feature_sets[key] = tmp_feature_sets[key]
|
| 92 |
+
|
| 93 |
+
if learn_options["include_microhomology"]:
|
| 94 |
+
feature_sets["microhomology"] = get_micro_homology_features(Y['Target gene'], learn_options, data)
|
| 95 |
+
|
| 96 |
+
t1 = time.time()
|
| 97 |
+
if not quiet:
|
| 98 |
+
print("\t\tElapsed time for constructing features is %.2f seconds" % (t1-t0))
|
| 99 |
+
|
| 100 |
+
check_feature_set(feature_sets)
|
| 101 |
+
|
| 102 |
+
if learn_options['normalize_features']:
|
| 103 |
+
assert("should not be here as doesn't make sense when we make one-off predictions, but could make sense for internal model comparisons when using regularized models")
|
| 104 |
+
feature_sets = normalize_feature_sets(feature_sets)
|
| 105 |
+
check_feature_set(feature_sets)
|
| 106 |
+
|
| 107 |
+
return feature_sets
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def check_feature_set(feature_sets):
|
| 111 |
+
'''
|
| 112 |
+
Ensure the # of people is the same in each feature set
|
| 113 |
+
'''
|
| 114 |
+
assert feature_sets != {}, "no feature sets present"
|
| 115 |
+
|
| 116 |
+
N = None
|
| 117 |
+
for ft in list(feature_sets.keys()):
|
| 118 |
+
N2 = feature_sets[ft].shape[0]
|
| 119 |
+
if N is None:
|
| 120 |
+
N = N2
|
| 121 |
+
else:
|
| 122 |
+
assert N >= 1, "should be at least one individual"
|
| 123 |
+
assert N == N2, "# of individuals do not match up across feature sets"
|
| 124 |
+
|
| 125 |
+
for set in list(feature_sets.keys()):
|
| 126 |
+
if np.any(np.isnan(feature_sets[set])):
|
| 127 |
+
raise Exception("found Nan in set %s" % set)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def NGGX_interaction_feature(data, pam_audit=True):
|
| 131 |
+
'''
|
| 132 |
+
assuming 30-mer, grab the NGGX _ _ positions, and make a one-hot
|
| 133 |
+
encoding of the NX nucleotides yielding 4x4=16 features
|
| 134 |
+
'''
|
| 135 |
+
sequence = data['30mer'].values
|
| 136 |
+
feat_NX = pandas.DataFrame()
|
| 137 |
+
# check that GG is where we think
|
| 138 |
+
for seq in sequence:
|
| 139 |
+
if pam_audit and seq[25:27] != "GG":
|
| 140 |
+
raise Exception("expected GG but found %s" % seq[25:27])
|
| 141 |
+
NX = seq[24]+seq[27]
|
| 142 |
+
NX_onehot = nucleotide_features(NX,order=2, feature_type='pos_dependent', max_index_to_use=2, prefix="NGGX")
|
| 143 |
+
# NX_onehot[:] = np.random.rand(NX_onehot.shape[0]) ##TESTING RANDOM FEATURE
|
| 144 |
+
feat_NX = pandas.concat([feat_NX, NX_onehot], axis=1)
|
| 145 |
+
return feat_NX.T
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def get_all_order_nuc_features(data, feature_sets, learn_options, maxorder, max_index_to_use, prefix="", quiet=False):
|
| 149 |
+
for order in range(1, maxorder+1):
|
| 150 |
+
if not quiet:
|
| 151 |
+
print("\t\tconstructing order %s features" % order)
|
| 152 |
+
nuc_features_pd, nuc_features_pi = apply_nucleotide_features(data, order, learn_options["num_proc"],
|
| 153 |
+
include_pos_independent=True, max_index_to_use=max_index_to_use, prefix=prefix)
|
| 154 |
+
feature_sets['%s_nuc_pd_Order%i' % (prefix, order)] = nuc_features_pd
|
| 155 |
+
if learn_options['include_pi_nuc_feat']:
|
| 156 |
+
feature_sets['%s_nuc_pi_Order%i' % (prefix, order)] = nuc_features_pi
|
| 157 |
+
check_feature_set(feature_sets)
|
| 158 |
+
|
| 159 |
+
if not quiet:
|
| 160 |
+
print("\t\t\t\t\t\t\tdone")
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def countGC(s, length_audit=True):
|
| 164 |
+
'''
|
| 165 |
+
GC content for only the 20mer, as per the Doench paper/code
|
| 166 |
+
'''
|
| 167 |
+
if length_audit:
|
| 168 |
+
assert len(s) == 30, "seems to assume 30mer"
|
| 169 |
+
return len(s[4:24].replace('A', '').replace('T', ''))
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def SeqUtilFeatures(data):
|
| 173 |
+
'''
|
| 174 |
+
assuming '30-mer'is a key
|
| 175 |
+
get melting temperature features from:
|
| 176 |
+
0-the 30-mer ("global Tm")
|
| 177 |
+
1-the Tm (melting temperature) of the DNA:RNA hybrid from positions 16 - 20 of the sgRNA, i.e. the 5nts immediately proximal of the NGG PAM
|
| 178 |
+
2-the Tm of the DNA:RNA hybrid from position 8 - 15 (i.e. 8 nt)
|
| 179 |
+
3-the Tm of the DNA:RNA hybrid from position 3 - 7 (i.e. 5 nt)
|
| 180 |
+
'''
|
| 181 |
+
sequence = data['30mer'].values
|
| 182 |
+
num_features = 1
|
| 183 |
+
featarray = np.ones((sequence.shape[0], num_features))
|
| 184 |
+
for i, seq in enumerate(sequence):
|
| 185 |
+
assert len(seq) == 30, "seems to assume 30mer"
|
| 186 |
+
featarray[i, 0] = SeqUtil.molecular_weight(str(seq))
|
| 187 |
+
|
| 188 |
+
feat = pandas.DataFrame(pandas.DataFrame(featarray))
|
| 189 |
+
return feat
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def organism_feature(data):
|
| 193 |
+
'''
|
| 194 |
+
Human vs. mouse
|
| 195 |
+
'''
|
| 196 |
+
organism = np.array(data['Organism'].values)
|
| 197 |
+
feat = pandas.DataFrame(pandas.DataFrame(featarray))
|
| 198 |
+
import ipdb; ipdb.set_trace()
|
| 199 |
+
return feat
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def get_micro_homology_features(gene_names, learn_options, X):
|
| 203 |
+
# originally was flipping the guide itself as necessary, but now flipping the gene instead
|
| 204 |
+
|
| 205 |
+
print("building microhomology features")
|
| 206 |
+
feat = pandas.DataFrame(index=X.index)
|
| 207 |
+
feat["mh_score"] = ""
|
| 208 |
+
feat["oof_score"] = ""
|
| 209 |
+
|
| 210 |
+
#with open(r"tmp\V%s_gene_mismatches.csv" % learn_options["V"],'wb') as f:
|
| 211 |
+
if True:
|
| 212 |
+
# number of nulceotides to take to the left and right of the guide
|
| 213 |
+
k_mer_length_left = 9
|
| 214 |
+
k_mer_length_right = 21
|
| 215 |
+
for gene in gene_names.unique():
|
| 216 |
+
gene_seq = Seq.Seq(util.get_gene_sequence(gene)).reverse_complement()
|
| 217 |
+
guide_inds = np.where(gene_names.values == gene)[0]
|
| 218 |
+
print("getting microhomology for all %d guides in gene %s" % (len(guide_inds), gene))
|
| 219 |
+
for j, ps in enumerate(guide_inds):
|
| 220 |
+
guide_seq = Seq.Seq(X['30mer'][ps])
|
| 221 |
+
strand = X['Strand'][ps]
|
| 222 |
+
if strand=='sense':
|
| 223 |
+
gene_seq = gene_seq.reverse_complement()
|
| 224 |
+
# figure out the sequence to the left and right of this guide, in the gene
|
| 225 |
+
ind = gene_seq.find(guide_seq)
|
| 226 |
+
if ind==-1:
|
| 227 |
+
gene_seq = gene_seq.reverse_complement()
|
| 228 |
+
ind = gene_seq.find(guide_seq)
|
| 229 |
+
#assert ind != -1, "still didn't work"
|
| 230 |
+
#print "shouldn't get here"
|
| 231 |
+
else:
|
| 232 |
+
#print "all good"
|
| 233 |
+
pass
|
| 234 |
+
#assert ind != -1, "could not find guide in gene"
|
| 235 |
+
if ind==-1:
|
| 236 |
+
#print "***could not find guide %s for gene %s" % (str(guide_seq), str(gene))
|
| 237 |
+
#if.write(str(gene) + "," + str(guide_seq))
|
| 238 |
+
mh_score = 0
|
| 239 |
+
oof_score = 0
|
| 240 |
+
else:
|
| 241 |
+
#print "worked"
|
| 242 |
+
|
| 243 |
+
assert gene_seq[ind:(ind+len(guide_seq))]==guide_seq, "match not right"
|
| 244 |
+
left_win = gene_seq[(ind - k_mer_length_left):ind]
|
| 245 |
+
right_win = gene_seq[(ind + len(guide_seq)):(ind + len(guide_seq) + k_mer_length_right)]
|
| 246 |
+
|
| 247 |
+
#if strand=='antisense':
|
| 248 |
+
# # it's arbitrary which of sense and anti-sense we flip, we just want
|
| 249 |
+
# # to keep them in the same relative alphabet/direction
|
| 250 |
+
# left_win = left_win.reverse_complement()
|
| 251 |
+
# right_win = right_win.reverse_complement()
|
| 252 |
+
assert len(left_win.tostring())==k_mer_length_left
|
| 253 |
+
assert len(right_win.tostring())==k_mer_length_right
|
| 254 |
+
|
| 255 |
+
sixtymer = str(left_win) + str(guide_seq) + str(right_win)
|
| 256 |
+
assert len(sixtymer)==60, "should be of length 60"
|
| 257 |
+
mh_score, oof_score = microhomology.compute_score(sixtymer)
|
| 258 |
+
|
| 259 |
+
feat.ix[ps,"mh_score"] = mh_score
|
| 260 |
+
feat.ix[ps,"oof_score"] = oof_score
|
| 261 |
+
print("computed microhomology of %s" % (str(gene)))
|
| 262 |
+
|
| 263 |
+
return pandas.DataFrame(feat, dtype='float')
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def local_gene_seq_features(gene_names, learn_options, X):
|
| 267 |
+
|
| 268 |
+
print("building local gene sequence features")
|
| 269 |
+
feat = pandas.DataFrame(index=X.index)
|
| 270 |
+
feat["gene_left_win"] = ""
|
| 271 |
+
feat["gene_right_win"] = ""
|
| 272 |
+
|
| 273 |
+
# number of nulceotides to take to the left and right of the guide
|
| 274 |
+
k_mer_length = learn_options['include_gene_guide_feature']
|
| 275 |
+
for gene in gene_names.unique():
|
| 276 |
+
gene_seq = Seq.Seq(util.get_gene_sequence(gene)).reverse_complement()
|
| 277 |
+
for ps in np.where(gene_names.values==gene)[0]:
|
| 278 |
+
guide_seq = Seq.Seq(X['30mer'][ps])
|
| 279 |
+
strand = X['Strand'][ps]
|
| 280 |
+
if strand=='sense':
|
| 281 |
+
guide_seq = guide_seq.reverse_complement()
|
| 282 |
+
#gene_seq = gene_seq.reverse_complement()
|
| 283 |
+
# figure out the sequence to the left and right of this guide, in the gene
|
| 284 |
+
ind = gene_seq.find(guide_seq)
|
| 285 |
+
if ind ==-1:
|
| 286 |
+
#gene_seq = gene_seq.reverse_complement()
|
| 287 |
+
#ind = gene_seq.find(guide_seq)
|
| 288 |
+
assert ind != -1, "could not find guide in gene"
|
| 289 |
+
assert gene_seq[ind:(ind+len(guide_seq))]==guide_seq, "match not right"
|
| 290 |
+
left_win = gene_seq[(ind - k_mer_length):ind]
|
| 291 |
+
right_win = gene_seq[(ind + len(guide_seq)):(ind + len(guide_seq) + k_mer_length)]
|
| 292 |
+
|
| 293 |
+
if strand=='antisense':
|
| 294 |
+
# it's arbitrary which of sense and anti-sense we flip, we just want
|
| 295 |
+
# to keep them in the same relative alphabet/direction
|
| 296 |
+
left_win = left_win.reverse_complement()
|
| 297 |
+
right_win = right_win.reverse_complement()
|
| 298 |
+
assert not left_win.tostring()=="", "k_mer_context, %s, is too large" % k_mer_length
|
| 299 |
+
assert not left_win.tostring()=="", "k_mer_context, %s, is too large" % k_mer_length
|
| 300 |
+
assert len(left_win)==len(right_win), "k_mer_context, %s, is too large" % k_mer_length
|
| 301 |
+
feat.ix[ps,"gene_left_win"] = left_win.tostring()
|
| 302 |
+
feat.ix[ps,"gene_right_win"] = right_win.tostring()
|
| 303 |
+
print("featurizing local context of %s" % (gene))
|
| 304 |
+
|
| 305 |
+
feature_sets = {}
|
| 306 |
+
get_all_order_nuc_features(feat["gene_left_win"], feature_sets, learn_options, learn_options["order"], max_index_to_use=sys.maxsize, prefix="gene_left_win")
|
| 307 |
+
get_all_order_nuc_features(feat["gene_right_win"], feature_sets, learn_options, learn_options["order"], max_index_to_use=sys.maxsize, prefix="gene_right_win")
|
| 308 |
+
return feature_sets
|
| 309 |
+
|
| 310 |
+
def gene_feature(Y, X, learn_options):
|
| 311 |
+
'''
|
| 312 |
+
Things like the sequence of the gene, the DNA Tm of the gene, etc.
|
| 313 |
+
'''
|
| 314 |
+
|
| 315 |
+
gene_names = Y['Target gene']
|
| 316 |
+
|
| 317 |
+
gene_length = np.zeros((gene_names.values.shape[0], 1))
|
| 318 |
+
gc_content = np.zeros((gene_names.shape[0], 1))
|
| 319 |
+
temperature = np.zeros((gene_names.shape[0], 1))
|
| 320 |
+
molecular_weight = np.zeros((gene_names.shape[0], 1))
|
| 321 |
+
|
| 322 |
+
for gene in gene_names.unique():
|
| 323 |
+
seq = util.get_gene_sequence(gene)
|
| 324 |
+
gene_length[gene_names.values==gene] = len(seq)
|
| 325 |
+
gc_content[gene_names.values==gene] = SeqUtil.GC(seq)
|
| 326 |
+
temperature[gene_names.values==gene] = Tm.Tm_NN(seq, nn_table=Tm.DNA_NN3)
|
| 327 |
+
molecular_weight[gene_names.values==gene] = SeqUtil.molecular_weight(seq, 'DNA')
|
| 328 |
+
|
| 329 |
+
all = np.concatenate((gene_length, gc_content, temperature, molecular_weight), axis=1)
|
| 330 |
+
df = pandas.DataFrame(data=all, index=gene_names.index, columns=['gene length',
|
| 331 |
+
'gene GC content',
|
| 332 |
+
'gene temperature',
|
| 333 |
+
'gene molecular weight'])
|
| 334 |
+
return df
|
| 335 |
+
|
| 336 |
+
def gene_guide_feature(Y, X, learn_options):
|
| 337 |
+
#features, which are related to parts of the gene-local to the guide, and
|
| 338 |
+
#possibly incorporating the guide or interactions with it
|
| 339 |
+
|
| 340 |
+
#expensive, so pickle if necessary
|
| 341 |
+
gene_file = r"..\data\gene_seq_feat_V%s_km%s.ord%s.pickle" % (learn_options['V'], learn_options['include_gene_guide_feature'], learn_options['order'])
|
| 342 |
+
|
| 343 |
+
if False: #os.path.isfile(gene_file): #while debugging, comment out
|
| 344 |
+
print("loading local gene seq feats from file %s" % gene_file)
|
| 345 |
+
with open(gene_file, "rb") as f: feature_sets = pickle.load(f)
|
| 346 |
+
else:
|
| 347 |
+
feature_sets = local_gene_seq_features(Y['Target gene'], learn_options, X)
|
| 348 |
+
print("writing local gene seq feats to file %s" % gene_file)
|
| 349 |
+
with open(gene_file, "wb") as f: pickle.dump(feature_sets, f)
|
| 350 |
+
|
| 351 |
+
return feature_sets
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def gc_cont(seq):
|
| 355 |
+
return (seq.count('G') + seq.count('C'))/float(len(seq))
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
def Tm_feature(data, pam_audit=True, learn_options=None):
|
| 360 |
+
'''
|
| 361 |
+
assuming '30-mer'is a key
|
| 362 |
+
get melting temperature features from:
|
| 363 |
+
0-the 30-mer ("global Tm")
|
| 364 |
+
1-the Tm (melting temperature) of the DNA:RNA hybrid from positions 16 - 20 of the sgRNA, i.e. the 5nts immediately proximal of the NGG PAM
|
| 365 |
+
2-the Tm of the DNA:RNA hybrid from position 8 - 15 (i.e. 8 nt)
|
| 366 |
+
3-the Tm of the DNA:RNA hybrid from position 3 - 7 (i.e. 5 nt)
|
| 367 |
+
'''
|
| 368 |
+
|
| 369 |
+
if learn_options is None or 'Tm segments' not in list(learn_options.keys()):
|
| 370 |
+
segments = [(19, 24), (11, 19), (6, 11)]
|
| 371 |
+
else:
|
| 372 |
+
segments = learn_options['Tm segments']
|
| 373 |
+
|
| 374 |
+
sequence = data['30mer'].values
|
| 375 |
+
featarray = np.ones((sequence.shape[0],4))
|
| 376 |
+
|
| 377 |
+
for i, seq in enumerate(sequence):
|
| 378 |
+
if pam_audit and seq[25:27]!="GG":
|
| 379 |
+
raise Exception("expected GG but found %s" % seq[25:27])
|
| 380 |
+
rna = False
|
| 381 |
+
featarray[i,0] = Tm.Tm_NN(seq, nn_table=Tm.DNA_NN3) #30mer Tm
|
| 382 |
+
featarray[i,1] = Tm.Tm_NN(seq[segments[0][0]:segments[0][1]], nn_table=Tm.DNA_NN3) #5nts immediately proximal of the NGG PAM
|
| 383 |
+
featarray[i,2] = Tm.Tm_NN(seq[segments[1][0]:segments[1][1]], nn_table=Tm.DNA_NN3) #8-mer
|
| 384 |
+
featarray[i,3] = Tm.Tm_NN(seq[segments[2][0]:segments[2][1]], nn_table=Tm.DNA_NN3) #5-mer
|
| 385 |
+
|
| 386 |
+
feat = pandas.DataFrame(featarray, index=data.index, columns=["Tm global_%s" % rna, "5mer_end_%s" %rna, "8mer_middle_%s" %rna, "5mer_start_%s" %rna])
|
| 387 |
+
|
| 388 |
+
return feat
|
| 389 |
+
|
| 390 |
+
def gc_features(data, audit=True):
|
| 391 |
+
gc_count = data['30mer'].apply(lambda seq: countGC(seq, audit))
|
| 392 |
+
gc_count.name = 'GC count'
|
| 393 |
+
gc_above_10 = (gc_count > 10)*1
|
| 394 |
+
gc_above_10.name = 'GC > 10'
|
| 395 |
+
gc_below_10 = (gc_count < 10)*1
|
| 396 |
+
gc_below_10.name = 'GC < 10'
|
| 397 |
+
return gc_above_10, gc_below_10, gc_count
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
def normalize_features(data,axis):
|
| 402 |
+
'''
|
| 403 |
+
input: Pandas.DataFrame of dtype=np.float64 array, of dimensions
|
| 404 |
+
mean-center, and unit variance each feature
|
| 405 |
+
'''
|
| 406 |
+
data -= data.mean(axis)
|
| 407 |
+
data /= data.std(axis)
|
| 408 |
+
# remove rows with NaNs
|
| 409 |
+
data = data.dropna(1)
|
| 410 |
+
if np.any(np.isnan(data.values)): raise Exception("found NaN in normalized features")
|
| 411 |
+
return data
|
| 412 |
+
|
| 413 |
+
def apply_nucleotide_features(seq_data_frame, order, num_proc, include_pos_independent, max_index_to_use, prefix=""):
|
| 414 |
+
|
| 415 |
+
fast = True
|
| 416 |
+
if include_pos_independent:
|
| 417 |
+
feat_pd = seq_data_frame.apply(nucleotide_features, args=(order, max_index_to_use, prefix, 'pos_dependent'))
|
| 418 |
+
feat_pi = seq_data_frame.apply(nucleotide_features, args=(order, max_index_to_use, prefix, 'pos_independent'))
|
| 419 |
+
assert not np.any(np.isnan(feat_pd)), "nans here can arise from sequences of different lengths"
|
| 420 |
+
assert not np.any(np.isnan(feat_pi)), "nans here can arise from sequences of different lengths"
|
| 421 |
+
return feat_pd, feat_pi
|
| 422 |
+
else:
|
| 423 |
+
feat_pd = seq_data_frame.apply(nucleotide_features, args=(order, max_index_to_use, prefix, 'pos_dependent'))
|
| 424 |
+
assert not np.any(np.isnan(feat_pd)), "found nan in feat_pd"
|
| 425 |
+
return feat_pd
|
| 426 |
+
|
| 427 |
+
def get_alphabet(order, raw_alphabet = ['A', 'T', 'C', 'G']):
|
| 428 |
+
alphabet = ["".join(i) for i in itertools.product(raw_alphabet, repeat=order)]
|
| 429 |
+
return alphabet
|
| 430 |
+
|
| 431 |
+
def nucleotide_features(s, order, max_index_to_use, prefix="", feature_type='all', raw_alphabet = ['A', 'T', 'C', 'G']):
|
| 432 |
+
'''
|
| 433 |
+
compute position-specific order-mer features for the 4-letter alphabet
|
| 434 |
+
(e.g. for a sequence of length 30, there are 30*4 single nucleotide features
|
| 435 |
+
and (30-1)*4^2=464 double nucleotide features
|
| 436 |
+
'''
|
| 437 |
+
assert feature_type in ['all', 'pos_independent', 'pos_dependent']
|
| 438 |
+
if max_index_to_use <= len(s):
|
| 439 |
+
#print "WARNING: trimming max_index_to use down to length of string=%s" % len(s)
|
| 440 |
+
max_index_to_use = len(s)
|
| 441 |
+
|
| 442 |
+
if max_index_to_use is not None:
|
| 443 |
+
s = s[:max_index_to_use]
|
| 444 |
+
#assert(len(s)==30, "length not 30")
|
| 445 |
+
#s = s[:30] #cut-off at thirty to clean up extra data that they accidentally left in, and were instructed to ignore in this way
|
| 446 |
+
alphabet = get_alphabet(order, raw_alphabet = raw_alphabet)
|
| 447 |
+
features_pos_dependent = np.zeros(len(alphabet)*(len(s)-(order-1)))
|
| 448 |
+
features_pos_independent = np.zeros(np.power(len(raw_alphabet),order))
|
| 449 |
+
|
| 450 |
+
index_dependent = []
|
| 451 |
+
index_independent = []
|
| 452 |
+
|
| 453 |
+
for position in range(0, len(s)-order+1, 1):
|
| 454 |
+
for l in alphabet:
|
| 455 |
+
index_dependent.append('%s%s_%d' % (prefix, l, position))
|
| 456 |
+
|
| 457 |
+
for l in alphabet:
|
| 458 |
+
index_independent.append('%s%s' % (prefix, l))
|
| 459 |
+
|
| 460 |
+
for position in range(0, len(s)-order+1, 1):
|
| 461 |
+
nucl = s[position:position+order]
|
| 462 |
+
features_pos_dependent[alphabet.index(nucl) + (position*len(alphabet))] = 1.0
|
| 463 |
+
features_pos_independent[alphabet.index(nucl)] += 1.0
|
| 464 |
+
|
| 465 |
+
# this is to check that the labels in the pd df actually match the nucl and position
|
| 466 |
+
assert index_dependent[alphabet.index(nucl) + (position*len(alphabet))] == '%s%s_%d' % (prefix, nucl, position)
|
| 467 |
+
assert index_independent[alphabet.index(nucl)] == '%s%s' % (prefix, nucl)
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
#index_independent = ['%s_pi.Order%d_P%d' % (prefix, order,i) for i in range(len(features_pos_independent))]
|
| 471 |
+
#index_dependent = ['%s_pd.Order%d_P%d' % (prefix, order, i) for i in range(len(features_pos_dependent))]
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
if np.any(np.isnan(features_pos_dependent)):
|
| 475 |
+
raise Exception("found nan features in features_pos_dependent")
|
| 476 |
+
if np.any(np.isnan(features_pos_independent)):
|
| 477 |
+
raise Exception("found nan features in features_pos_independent")
|
| 478 |
+
|
| 479 |
+
if feature_type == 'all' or feature_type == 'pos_independent':
|
| 480 |
+
if feature_type == 'all':
|
| 481 |
+
res = pandas.Series(features_pos_dependent,index=index_dependent), pandas.Series(features_pos_independent,index=index_independent)
|
| 482 |
+
assert not np.any(np.isnan(res.values))
|
| 483 |
+
return res
|
| 484 |
+
else:
|
| 485 |
+
res = pandas.Series(features_pos_independent, index=index_independent)
|
| 486 |
+
assert not np.any(np.isnan(res.values))
|
| 487 |
+
return res
|
| 488 |
+
|
| 489 |
+
res = pandas.Series(features_pos_dependent, index=index_dependent)
|
| 490 |
+
assert not np.any(np.isnan(res.values))
|
| 491 |
+
return res
|
| 492 |
+
|
| 493 |
+
def nucleotide_features_dictionary(prefix=''):
|
| 494 |
+
seqname = ['-4', '-3', '-2', '-1']
|
| 495 |
+
seqname.extend([str(i) for i in range(1,21)])
|
| 496 |
+
seqname.extend(['N', 'G', 'G', '+1', '+2', '+3'])
|
| 497 |
+
|
| 498 |
+
orders = [1, 2, 3]
|
| 499 |
+
sequence = 30
|
| 500 |
+
feature_names_dep = []
|
| 501 |
+
feature_names_indep = []
|
| 502 |
+
index_dependent = []
|
| 503 |
+
index_independent = []
|
| 504 |
+
|
| 505 |
+
for order in orders:
|
| 506 |
+
raw_alphabet = ['A', 'T', 'C', 'G']
|
| 507 |
+
alphabet = ["".join(i) for i in itertools.product(raw_alphabet, repeat=order)]
|
| 508 |
+
features_pos_dependent = np.zeros(len(alphabet)*(sequence-(order-1)))
|
| 509 |
+
features_pos_independent = np.zeros(np.power(len(raw_alphabet),order))
|
| 510 |
+
|
| 511 |
+
index_dependent.extend(['%s_pd.Order%d_P%d' % (prefix, order, i) for i in range(len(features_pos_dependent))])
|
| 512 |
+
index_independent.extend(['%s_pi.Order%d_P%d' % (prefix, order,i) for i in range(len(features_pos_independent))])
|
| 513 |
+
|
| 514 |
+
for pos in range(sequence-(order-1)):
|
| 515 |
+
for letter in alphabet:
|
| 516 |
+
feature_names_dep.append('%s_%s' % (letter, seqname[pos]))
|
| 517 |
+
|
| 518 |
+
for letter in alphabet:
|
| 519 |
+
feature_names_indep.append('%s' % letter)
|
| 520 |
+
|
| 521 |
+
assert len(feature_names_indep) == len(index_independent)
|
| 522 |
+
assert len(feature_names_dep) == len(index_dependent)
|
| 523 |
+
|
| 524 |
+
index_all = index_dependent + index_independent
|
| 525 |
+
feature_all = feature_names_dep + feature_names_indep
|
| 526 |
+
|
| 527 |
+
return dict(list(zip(index_all, feature_all)))
|
| 528 |
+
|
| 529 |
+
def normalize_feature_sets(feature_sets):
|
| 530 |
+
'''
|
| 531 |
+
zero-mean, unit-variance each feature within each set
|
| 532 |
+
'''
|
| 533 |
+
|
| 534 |
+
print("Normalizing features...")
|
| 535 |
+
t1 = time.time()
|
| 536 |
+
|
| 537 |
+
new_feature_sets = {}
|
| 538 |
+
for set in feature_sets:
|
| 539 |
+
new_feature_sets[set] = normalize_features(feature_sets[set],axis=0)
|
| 540 |
+
if np.any(np.isnan(new_feature_sets[set].values)):
|
| 541 |
+
raise Exception("found Nan feature values in set=%s" % set)
|
| 542 |
+
assert new_feature_sets[set].shape[1] > 0, "0 columns of features"
|
| 543 |
+
t2 = time.time()
|
| 544 |
+
print("\t\tElapsed time for normalizing features is %.2f seconds" % (t2-t1))
|
| 545 |
+
|
| 546 |
+
return new_feature_sets
|
|
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas
|
| 2 |
+
import pkg_resources
|
| 3 |
+
|
| 4 |
+
from . import util
|
| 5 |
+
# import matplotlib.pyplot as plt
|
| 6 |
+
import scipy as sp
|
| 7 |
+
import scipy.stats
|
| 8 |
+
import numpy as np
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
| 12 |
+
|
| 13 |
+
def from_custom_file(data_file, learn_options):
|
| 14 |
+
# use semantics of when we load V2 data
|
| 15 |
+
print("Loading inputs to predict from %s" % data_file)
|
| 16 |
+
data = pandas.read_csv(data_file)
|
| 17 |
+
|
| 18 |
+
mandatory_columns = ['30mer', 'Target gene', 'Percent Peptide', 'Amino Acid Cut position']
|
| 19 |
+
for col in mandatory_columns:
|
| 20 |
+
assert col in data.columns, "inputs for prediction must include these columns: %s" % mandatory_columns
|
| 21 |
+
|
| 22 |
+
Xdf = pandas.DataFrame(data)
|
| 23 |
+
Xdf['30mercopy'] = Xdf['30mer']
|
| 24 |
+
Xdf = Xdf.set_index(['30mer', 'Target gene'])
|
| 25 |
+
Xdf['30mer'] = Xdf['30mercopy']
|
| 26 |
+
Xdf.index.names = ['Sequence', 'Target']
|
| 27 |
+
Xdf['drug']= ['dummydrug%s' % i for i in range(Xdf.shape[0])]
|
| 28 |
+
Xdf = Xdf.set_index('drug', append=True)
|
| 29 |
+
|
| 30 |
+
Y = None
|
| 31 |
+
gene_position = Xdf[['Percent Peptide', 'Amino Acid Cut position']]
|
| 32 |
+
target_genes = np.unique(Xdf.index.levels[1])
|
| 33 |
+
|
| 34 |
+
learn_options = set_V2_target_names(learn_options)
|
| 35 |
+
|
| 36 |
+
return Xdf, Y, gene_position, target_genes
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def from_file(data_file, learn_options, data_file2=None, data_file3=None):
|
| 40 |
+
if learn_options["V"] == 1: # from Nature Biotech paper
|
| 41 |
+
|
| 42 |
+
print("loading V%d data" % learn_options["V"])
|
| 43 |
+
|
| 44 |
+
assert not learn_options["weighted"] is not None, "not supported for V1 data"
|
| 45 |
+
annotations, gene_position, target_genes, Xdf, Y = read_V1_data(data_file, learn_options)
|
| 46 |
+
|
| 47 |
+
learn_options['binary target name'] = 'average threshold'
|
| 48 |
+
learn_options['rank-transformed target name'] = 'average rank'
|
| 49 |
+
learn_options['raw target name'] = 'average activity'
|
| 50 |
+
|
| 51 |
+
# NF: not sure why the line below was uncommented
|
| 52 |
+
# gene_position, selected_ind, target_genes, Xdf, Y = extract_by_organism("mouse", Xdf, Y, gene_position)
|
| 53 |
+
|
| 54 |
+
elif learn_options["V"] == 2: # from Nov 2014, hot off the machines
|
| 55 |
+
Xdf, drugs_to_genes, target_genes, Y, gene_position = read_V2_data(data_file, learn_options)
|
| 56 |
+
|
| 57 |
+
# check that data is consistent with sgRNA score
|
| 58 |
+
xx = Xdf['sgRNA Score'].values
|
| 59 |
+
yy = Y['score_drug_gene_rank'].values
|
| 60 |
+
rr,pp = sp.stats.pearsonr(xx, yy)
|
| 61 |
+
assert rr > 0, "data processing has gone wrong as correlation with previous predictions is negative"
|
| 62 |
+
|
| 63 |
+
learn_options = set_V2_target_names(learn_options)
|
| 64 |
+
|
| 65 |
+
elif learn_options["V"] == 3: # merge of V1 and V2--this is what is used for the final model
|
| 66 |
+
# these are relative to the V2 data, and V1 will be made to automatically match
|
| 67 |
+
learn_options['binary target name'] = 'score_drug_gene_threshold'
|
| 68 |
+
learn_options['rank-transformed target name'] = 'score_drug_gene_rank'
|
| 69 |
+
learn_options['raw target name'] = None
|
| 70 |
+
|
| 71 |
+
Xdf, Y, gene_position, target_genes = mergeV1_V2(data_file, data_file2, learn_options)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
elif learn_options["V"] == 4: # merge of V1 and V2 and the Xu et al data
|
| 75 |
+
# these are relative to the V2 data, and V1 and Xu et al. will be made to automatically match
|
| 76 |
+
learn_options['binary target name'] = 'score_drug_gene_threshold'
|
| 77 |
+
learn_options['rank-transformed target name'] = 'score_drug_gene_rank'
|
| 78 |
+
learn_options['raw target name'] = None
|
| 79 |
+
|
| 80 |
+
Xdf, Y, gene_position, target_genes = merge_all(data_file, data_file2, data_file3, learn_options)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
elif learn_options['V'] == 5:
|
| 84 |
+
learn_options['binary target name'] = 'score_drug_gene_threshold'
|
| 85 |
+
learn_options['rank-transformed target name'] = 'score_drug_gene_rank'
|
| 86 |
+
learn_options['raw target name'] = None
|
| 87 |
+
|
| 88 |
+
gene_position, target_genes, Xdf, Y = read_xu_et_al(data_file3)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# truncate down to 30--some data sets gave us more.
|
| 92 |
+
Xdf["30mer"] = Xdf["30mer"].apply(lambda x: x[0:30])
|
| 93 |
+
|
| 94 |
+
return Xdf, Y, gene_position, target_genes
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def set_V2_target_names(learn_options):
|
| 98 |
+
if 'binary target name' not in list(learn_options.keys()):
|
| 99 |
+
learn_options['binary target name'] = 'score_drug_gene_threshold'
|
| 100 |
+
if 'rank-transformed target name' not in list(learn_options.keys()):
|
| 101 |
+
learn_options['rank-transformed target name'] = 'score_drug_gene_rank'
|
| 102 |
+
learn_options['raw target name'] = 'score'
|
| 103 |
+
return learn_options
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def combine_organisms(human_data, mouse_data):
|
| 107 |
+
# 'Target' is the column name, 'CD13' are some rows in that column
|
| 108 |
+
# xs slices through the pandas data frame to return another one
|
| 109 |
+
cd13 = human_data.xs('CD13', level='Target', drop_level=False)
|
| 110 |
+
# y_names are column names, cd13 is a pandas object
|
| 111 |
+
X_CD13, Y_CD13 = util.get_data(cd13, y_names=['NB4 CD13', 'TF1 CD13'])
|
| 112 |
+
cd33 = human_data.xs('CD33', level='Target', drop_level=False)
|
| 113 |
+
X_CD33, Y_CD33 = util.get_data(cd33, y_names=['MOLM13 CD33', 'TF1 CD33', 'NB4 CD33'])
|
| 114 |
+
cd15 = human_data.xs('CD15', level='Target', drop_level=False)
|
| 115 |
+
X_CD15, Y_CD15 = util.get_data(cd15, y_names=['MOLM13 CD15'])
|
| 116 |
+
|
| 117 |
+
mouse_X = pandas.DataFrame()
|
| 118 |
+
mouse_Y = pandas.DataFrame()
|
| 119 |
+
for k in mouse_data.index.levels[1]:
|
| 120 |
+
# is k the gene
|
| 121 |
+
X, Y = util.get_data(mouse_data.xs(k, level='Target', drop_level=False), ["On-target Gene"], target_gene=k, organism='mouse')
|
| 122 |
+
mouse_X = pandas.concat([mouse_X, X], axis=0)
|
| 123 |
+
mouse_Y = pandas.concat([mouse_Y, Y], axis=0)
|
| 124 |
+
|
| 125 |
+
X = pandas.concat([X_CD13, X_CD15, X_CD33, mouse_X], axis=0)
|
| 126 |
+
Y = pandas.concat([Y_CD13, Y_CD15, Y_CD33, mouse_Y], axis=0)
|
| 127 |
+
|
| 128 |
+
return X, Y
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def read_V1_data(data_file, learn_options, AML_file=None):
|
| 132 |
+
if data_file is None:
|
| 133 |
+
data_file = pkg_resources.resource_filename(__name__, "data/V1_data.xlsx")
|
| 134 |
+
if AML_file is None:
|
| 135 |
+
AML_file = pkg_resources.resource_filename(__name__, "data/V1_suppl_data.txt")
|
| 136 |
+
|
| 137 |
+
human_data = pandas.read_excel(data_file, sheet_name=0, index_col=[0, 1])
|
| 138 |
+
mouse_data = pandas.read_excel(data_file, sheet_name=1, index_col=[0, 1])
|
| 139 |
+
Xdf, Y = combine_organisms(human_data, mouse_data)
|
| 140 |
+
|
| 141 |
+
# get position within each gene, then join and re-order
|
| 142 |
+
# note that 11 missing guides we were told to ignore
|
| 143 |
+
annotations = pandas.read_csv(AML_file, delimiter='\t', index_col=[0, 4])
|
| 144 |
+
annotations.index.names = Xdf.index.names
|
| 145 |
+
gene_position = pandas.merge(Xdf, annotations, how="inner", left_index=True, right_index=True)
|
| 146 |
+
gene_position = util.impute_gene_position(gene_position)
|
| 147 |
+
gene_position = gene_position[['Amino Acid Cut position', 'Nucleotide cut position', 'Percent Peptide']]
|
| 148 |
+
Y = Y.loc[gene_position.index]
|
| 149 |
+
Xdf = Xdf.loc[gene_position.index]
|
| 150 |
+
|
| 151 |
+
Y['test'] = 1 # for bookeeping to keep consistent with V2 which uses this for "extra pairs"
|
| 152 |
+
|
| 153 |
+
target_genes = Y['Target gene'].unique()
|
| 154 |
+
|
| 155 |
+
Y.index.names = ['Sequence', 'Target gene']
|
| 156 |
+
|
| 157 |
+
assert Xdf.index.equals(Y.index), "The index of Xdf is different from the index of Y (this can cause inconsistencies/random performance later on)"
|
| 158 |
+
|
| 159 |
+
if learn_options is not None and learn_options["flipV1target"]:
|
| 160 |
+
print("************************************************************************")
|
| 161 |
+
print("*****************MATCHING DOENCH CODE (DEBUG MODE)**********************")
|
| 162 |
+
print("************************************************************************")
|
| 163 |
+
# normally it is: Y['average threshold'] = Y['average rank'] > 0.8, where 1s are good guides, 0s are not
|
| 164 |
+
Y['average threshold'] = Y['average rank'] < 0.2 # 1s are bad guides
|
| 165 |
+
print("press c to continue")
|
| 166 |
+
import ipdb
|
| 167 |
+
ipdb.set_trace()
|
| 168 |
+
|
| 169 |
+
return annotations, gene_position, target_genes, Xdf, Y
|
| 170 |
+
|
| 171 |
+
def rank_transform(x):
|
| 172 |
+
return 1.0 - sp.stats.mstats.rankdata(x)/sp.stats.mstats.rankdata(x).max()
|
| 173 |
+
|
| 174 |
+
def read_xu_et_al(data_file, learn_options=None, verbose=True, subsetting='ours'):
|
| 175 |
+
if data_file is None:
|
| 176 |
+
data_file = '../data/xu_et_al_data.xlsx'
|
| 177 |
+
|
| 178 |
+
datasets = ['ribo', 'non_ribo', 'mESC']
|
| 179 |
+
aggregated = None
|
| 180 |
+
|
| 181 |
+
for d in datasets:
|
| 182 |
+
data_efficient = pandas.read_excel(data_file, sheet_name='%s_efficient_sgRNA' % d, skiprows=2)
|
| 183 |
+
data_inefficient = pandas.read_excel(data_file, sheet_name='%s_inefficient_sgRNA' % d, skiprows=2)
|
| 184 |
+
|
| 185 |
+
data_efficient['threshold'] = 1.
|
| 186 |
+
data_inefficient['threshold'] = 0.
|
| 187 |
+
|
| 188 |
+
exp_data = pandas.concat((data_efficient, data_inefficient))
|
| 189 |
+
exp_data['rank_KBM7'] = exp_data.groupby('Gene Symbol')['log2 fold change, KBM7'].transform(rank_transform)
|
| 190 |
+
exp_data['rank_HL60'] = exp_data.groupby('Gene Symbol')['log2 fold change, HL60'].transform(rank_transform)
|
| 191 |
+
|
| 192 |
+
if aggregated is None:
|
| 193 |
+
aggregated = exp_data
|
| 194 |
+
else:
|
| 195 |
+
aggregated = pandas.concat((aggregated, exp_data))
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
# go from 40mer to 30mer
|
| 199 |
+
if subsetting == 'ours':
|
| 200 |
+
aggregated["sequence(target+3'+5')"] = aggregated["sequence(target+3'+5')"].apply(lambda x: x[6:-4])
|
| 201 |
+
else:
|
| 202 |
+
aggregated["sequence(target+3'+5')"] = aggregated["sequence(target+3'+5')"].apply(lambda x: x[10:])
|
| 203 |
+
|
| 204 |
+
# make sure EVEYTHING is uppercase
|
| 205 |
+
aggregated["sequence(target+3'+5')"] = aggregated["sequence(target+3'+5')"].apply(lambda x: x.upper())
|
| 206 |
+
|
| 207 |
+
# rename columns
|
| 208 |
+
aggregated.rename(columns={"sequence(target+3'+5')": '30mer', 'Gene Symbol': 'Target gene', 'strand':'Strand'}, inplace=True)
|
| 209 |
+
|
| 210 |
+
aggregated['Strand'].loc[aggregated['Strand']=='+'] = 'sense'
|
| 211 |
+
aggregated['Strand'].loc[aggregated['Strand']=='-'] = 'antisense'
|
| 212 |
+
|
| 213 |
+
aggregated['average rank'] = aggregated[['rank_HL60', 'rank_KBM7']].mean(axis=1)
|
| 214 |
+
df = aggregated
|
| 215 |
+
df = df.rename(columns={'30mer': 'Sequence', 'Target gene': 'Target'})
|
| 216 |
+
df['drug'] = 'nodrug'
|
| 217 |
+
df['test'] = 1
|
| 218 |
+
df = df.set_index(['Sequence', 'Target', 'drug'])
|
| 219 |
+
df['30mer'] = df.index.get_level_values(0)
|
| 220 |
+
df['Target gene'] = df.index.get_level_values(1)
|
| 221 |
+
df['Organism'] = 'unknown'
|
| 222 |
+
df['score_drug_gene_rank'] = df['average rank']
|
| 223 |
+
df['score_drug_gene_threshold'] = df['threshold']
|
| 224 |
+
df['Nucleotide cut position'] = df['start of target']
|
| 225 |
+
df['Percent Peptide'] = 0
|
| 226 |
+
df['Amino Acid Cut position'] = 0
|
| 227 |
+
target_genes = np.unique(df['Target gene'].values)
|
| 228 |
+
|
| 229 |
+
return df[['Nucleotide cut position', 'Percent Peptide', 'Amino Acid Cut position']], target_genes, df[['30mer', 'Strand']], df[['score_drug_gene_rank', 'score_drug_gene_threshold', 'test', 'Target gene']]
|
| 230 |
+
|
| 231 |
+
def read_V2_data(data_file, learn_options=None, verbose=True):
|
| 232 |
+
if data_file is None:
|
| 233 |
+
data_file = pkg_resources.resource_filename(__name__, "data/V2_data.xlsx")
|
| 234 |
+
|
| 235 |
+
# to compare
|
| 236 |
+
# import predict as pr; a1, g1, t1, X1, Y1 = pr.data_setup()
|
| 237 |
+
# a1.index.names
|
| 238 |
+
|
| 239 |
+
data = pandas.read_excel(data_file, sheet_name="ResultsFiltered", skiprows=list(range(0, 6+1)), index_col=[0, 4])
|
| 240 |
+
# grab data relevant to each of three drugs, which exludes some genes
|
| 241 |
+
# note gene MED12 has two drugs, all others have at most one
|
| 242 |
+
Xdf = pandas.DataFrame()
|
| 243 |
+
|
| 244 |
+
# This comes from the "Pairs" tab in their excel sheet,
|
| 245 |
+
# note HPRT/HPRT1 are same thing, and also PLX_2uM/PLcX_2uM
|
| 246 |
+
known_pairs = {'AZD_200nM': ['CCDC101', 'MED12', 'TADA2B', 'TADA1'],
|
| 247 |
+
'6TG_2ug/mL': ['HPRT1'],
|
| 248 |
+
'PLX_2uM': ['CUL3', 'NF1', 'NF2', 'MED12']}
|
| 249 |
+
|
| 250 |
+
drugs_to_genes = {'AZD_200nM': ['CCDC101', 'MED12', 'TADA2B', 'TADA1'],
|
| 251 |
+
'6TG_2ug/mL': ['HPRT1'],
|
| 252 |
+
'PLX_2uM': ['CUL3', 'NF1', 'NF2', 'MED12']}
|
| 253 |
+
|
| 254 |
+
if learn_options is not None:
|
| 255 |
+
assert not (learn_options['extra pairs'] and learn_options['all pairs']), "extra pairs and all pairs options (in learn_options) can't be active simultaneously."
|
| 256 |
+
|
| 257 |
+
if learn_options['extra pairs']:
|
| 258 |
+
drugs_to_genes['AZD_200nM'].extend(['CUL3', 'NF1', 'NF2'])
|
| 259 |
+
elif learn_options['all pairs']:
|
| 260 |
+
drugs_to_genes['AZD_200nM'].extend(['HPRT1', 'CUL3', 'NF1', 'NF2'])
|
| 261 |
+
drugs_to_genes['PLX_2uM'].extend(['HPRT1', 'CCDC101', 'TADA2B', 'TADA1'])
|
| 262 |
+
drugs_to_genes['6TG_2ug/mL'].extend(['CCDC101', 'MED12', 'TADA2B', 'TADA1', 'CUL3', 'NF1', 'NF2'])
|
| 263 |
+
|
| 264 |
+
count = 0
|
| 265 |
+
for drug in list(drugs_to_genes.keys()):
|
| 266 |
+
genes = drugs_to_genes[drug]
|
| 267 |
+
for g in genes:
|
| 268 |
+
Xtmp = data.copy().xs(g, level='Target gene', drop_level=False)
|
| 269 |
+
Xtmp['drug'] = drug
|
| 270 |
+
Xtmp['score'] = Xtmp[drug].copy() # grab the drug results that are relevant for this gene
|
| 271 |
+
|
| 272 |
+
if g in known_pairs[drug]:
|
| 273 |
+
Xtmp['test'] = 1.
|
| 274 |
+
else:
|
| 275 |
+
Xtmp['test'] = 0.
|
| 276 |
+
|
| 277 |
+
count = count + Xtmp.shape[0]
|
| 278 |
+
Xdf = pandas.concat([Xdf, Xtmp], axis=0)
|
| 279 |
+
if verbose:
|
| 280 |
+
print("Loaded %d samples for gene %s \ttotal number of samples: %d" % (Xtmp.shape[0], g, count))
|
| 281 |
+
|
| 282 |
+
# create new index that includes the drug
|
| 283 |
+
Xdf = Xdf.set_index('drug', append=True)
|
| 284 |
+
|
| 285 |
+
Y = pandas.DataFrame(Xdf.pop("score"))
|
| 286 |
+
Y.columns.names = ["score"]
|
| 287 |
+
|
| 288 |
+
test_gene = pandas.DataFrame(Xdf.pop('test'))
|
| 289 |
+
target = pandas.DataFrame(Xdf.index.get_level_values('Target gene').values, index=Y.index, columns=["Target gene"])
|
| 290 |
+
Y = pandas.concat((Y, target, test_gene), axis=1)
|
| 291 |
+
target_genes = Y['Target gene'].unique()
|
| 292 |
+
gene_position = Xdf[["Percent Peptide", "Amino Acid Cut position"]].copy()
|
| 293 |
+
|
| 294 |
+
# convert to ranks for each (gene, drug combo)
|
| 295 |
+
# flip = True
|
| 296 |
+
y_rank = pandas.DataFrame()
|
| 297 |
+
y_threshold = pandas.DataFrame()
|
| 298 |
+
y_quant = pandas.DataFrame()
|
| 299 |
+
for drug in list(drugs_to_genes.keys()):
|
| 300 |
+
gene_list = drugs_to_genes[drug]
|
| 301 |
+
for gene in gene_list:
|
| 302 |
+
ytmp = pandas.DataFrame(Y.xs((gene, drug), level=["Target gene", "drug"], drop_level=False)['score'])
|
| 303 |
+
y_ranktmp, y_rank_raw, y_thresholdtmp, y_quanttmp = util.get_ranks(ytmp, thresh=0.8, prefix="score_drug_gene", flip=False)
|
| 304 |
+
# np.unique(y_rank.values-y_rank_raw.values)
|
| 305 |
+
y_rank = pandas.concat((y_rank, y_ranktmp), axis=0)
|
| 306 |
+
y_threshold = pandas.concat((y_threshold, y_thresholdtmp), axis=0)
|
| 307 |
+
y_quant = pandas.concat((y_quant, y_quanttmp), axis=0)
|
| 308 |
+
|
| 309 |
+
yall = pandas.concat((y_rank, y_threshold, y_quant), axis=1)
|
| 310 |
+
Y = pandas.merge(Y, yall, how='inner', left_index=True, right_index=True)
|
| 311 |
+
|
| 312 |
+
# convert also by drug only, irrespective of gene
|
| 313 |
+
y_rank = pandas.DataFrame()
|
| 314 |
+
y_threshold = pandas.DataFrame()
|
| 315 |
+
y_quant = pandas.DataFrame()
|
| 316 |
+
for drug in list(drugs_to_genes.keys()):
|
| 317 |
+
ytmp = pandas.DataFrame(Y.xs(drug, level="drug", drop_level=False)['score'])
|
| 318 |
+
y_ranktmp, y_rank_raw, y_thresholdtmp, y_quanttmp = util.get_ranks(ytmp, thresh=0.8, prefix="score_drug", flip=False)
|
| 319 |
+
# np.unique(y_rank.values-y_rank_raw.values)
|
| 320 |
+
y_rank = pandas.concat((y_rank, y_ranktmp), axis=0)
|
| 321 |
+
y_threshold = pandas.concat((y_threshold, y_thresholdtmp), axis=0)
|
| 322 |
+
y_quant = pandas.concat((y_quant, y_quanttmp), axis=0)
|
| 323 |
+
|
| 324 |
+
yall = pandas.concat((y_rank, y_threshold, y_quant), axis=1)
|
| 325 |
+
Y = pandas.merge(Y, yall, how='inner', left_index=True, right_index=True)
|
| 326 |
+
|
| 327 |
+
# PLOT = False
|
| 328 |
+
# if PLOT:
|
| 329 |
+
# # to better understand, try plotting something like:
|
| 330 |
+
# labels = ["score", "score_drug_gene_rank", "score_drug_rank", "score_drug_gene_threshold", "score_drug_threshold"]
|
| 331 |
+
#
|
| 332 |
+
# for label in labels:
|
| 333 |
+
# plt.figure()
|
| 334 |
+
# plt.plot(Xdf['sgRNA Score'].values, Y[label].values, '.')
|
| 335 |
+
# r, pearp = sp.stats.pearsonr(Xdf['sgRNA Score'].values.flatten(), Y[label].values.flatten())
|
| 336 |
+
# plt.title(label + ' VS pred. score, $r$=%0.2f (p=%0.2e)' % (r, pearp))
|
| 337 |
+
# plt.xlabel("sgRNA prediction score")
|
| 338 |
+
# plt.ylabel(label)
|
| 339 |
+
|
| 340 |
+
gene_position = util.impute_gene_position(gene_position)
|
| 341 |
+
|
| 342 |
+
if learn_options is not None and learn_options["weighted"] == "variance":
|
| 343 |
+
print("computing weights from replicate variance...")
|
| 344 |
+
# compute the variance across replicates so can use it as a weight
|
| 345 |
+
data = pandas.read_excel(data_file, sheet_name="Normalized", skiprows=list(range(0, 6+1)), index_col=[0, 4])
|
| 346 |
+
data.index.names = ["Sequence", "Target gene"]
|
| 347 |
+
|
| 348 |
+
experiments = {}
|
| 349 |
+
experiments['AZD_200nM'] = ['Deep 25', 'Deep 27', 'Deep 29 ', 'Deep 31']
|
| 350 |
+
experiments['6TG_2ug/mL'] = ['Deep 33', 'Deep 35', 'Deep 37', 'Deep 39']
|
| 351 |
+
experiments['PLX_2uM'] = ['Deep 49', 'Deep 51', 'Deep 53', 'Deep 55']
|
| 352 |
+
|
| 353 |
+
variance = None
|
| 354 |
+
for drug in list(drugs_to_genes.keys()):
|
| 355 |
+
data_tmp = data.iloc[data.index.get_level_values('Target gene').isin(drugs_to_genes[drug])][experiments[drug]]
|
| 356 |
+
data_tmp["drug"] = drug
|
| 357 |
+
data_tmp = data_tmp.set_index('drug', append=True)
|
| 358 |
+
data_tmp["variance"] = np.var(data_tmp.values, axis=1)
|
| 359 |
+
if variance is None:
|
| 360 |
+
variance = data_tmp["variance"].copy()
|
| 361 |
+
else:
|
| 362 |
+
variance = pandas.concat((variance, data_tmp["variance"]), axis=0)
|
| 363 |
+
|
| 364 |
+
orig_index = Y.index.copy()
|
| 365 |
+
Y = pandas.merge(Y, pandas.DataFrame(variance), how="inner", left_index=True, right_index=True)
|
| 366 |
+
Y = Y.ix[orig_index]
|
| 367 |
+
print("done.")
|
| 368 |
+
|
| 369 |
+
# Make sure to keep this check last in this function
|
| 370 |
+
assert Xdf.index.equals(Y.index), "The index of Xdf is different from the index of Y (this can cause inconsistencies/random performance later on)"
|
| 371 |
+
|
| 372 |
+
return Xdf, drugs_to_genes, target_genes, Y, gene_position
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def merge_all(data_file=None, data_file2=None, data_file3=None, learn_options=None):
|
| 376 |
+
Xdf, Y, gene_position, target_genes = mergeV1_V2(data_file, data_file2, learn_options)
|
| 377 |
+
gene_position_xu, target_genes_xu, Xdf_xu, Y_xu = read_xu_et_al(data_file3, learn_options)
|
| 378 |
+
Xdf = pandas.concat((Xdf, Xdf_xu))
|
| 379 |
+
Y = pandas.concat((Y, Y_xu))
|
| 380 |
+
gene_position = pandas.concat((gene_position, gene_position_xu))
|
| 381 |
+
target_genes = np.concatenate((target_genes, target_genes_xu))
|
| 382 |
+
|
| 383 |
+
return Xdf, Y, gene_position, target_genes
|
| 384 |
+
|
| 385 |
+
def mergeV1_V2(data_file, data_file2, learn_options):
|
| 386 |
+
'''
|
| 387 |
+
ground_truth_label, etc. are taken to correspond to the V2 data, and then the V1 is appropriately matched
|
| 388 |
+
based on semantics
|
| 389 |
+
'''
|
| 390 |
+
assert not learn_options['include_strand'], "don't currently have 'Strand' column in V1 data"
|
| 391 |
+
|
| 392 |
+
annotations, gene_position1, target_genes1, Xdf1, Y1 = read_V1_data(data_file, learn_options)
|
| 393 |
+
Xdf2, drugs_to_genes, target_genes2, Y2, gene_position2 = read_V2_data(data_file2)
|
| 394 |
+
|
| 395 |
+
Y1.rename(columns={'average rank': learn_options["rank-transformed target name"]}, inplace=True)
|
| 396 |
+
Y1.rename(columns={'average threshold': learn_options["binary target name"]}, inplace=True)
|
| 397 |
+
|
| 398 |
+
# rename columns, and add a dummy "drug" to V1 so can join the data sets
|
| 399 |
+
Y1["drug"] = ["nodrug" for x in range(Y1.shape[0])]
|
| 400 |
+
Y1 = Y1.set_index('drug', append=True)
|
| 401 |
+
Y1.index.names = ['Sequence', 'Target gene', 'drug']
|
| 402 |
+
|
| 403 |
+
Y_cols_to_keep = np.unique(['Target gene', 'test', 'score_drug_gene_rank', 'score_drug_gene_threshold'])
|
| 404 |
+
|
| 405 |
+
Y1 = Y1[Y_cols_to_keep]
|
| 406 |
+
Y2 = Y2[Y_cols_to_keep]
|
| 407 |
+
|
| 408 |
+
Xdf1["drug"] = ["nodrug" for x in range(Xdf1.shape[0])]
|
| 409 |
+
Xdf1 = Xdf1.set_index('drug', append=True)
|
| 410 |
+
|
| 411 |
+
X_cols_to_keep = ['30mer', 'Strand']
|
| 412 |
+
Xdf1 = Xdf1[X_cols_to_keep]
|
| 413 |
+
Xdf2 = Xdf2[X_cols_to_keep]
|
| 414 |
+
|
| 415 |
+
gene_position1["drug"] = ["nodrug" for x in range(gene_position1.shape[0])]
|
| 416 |
+
gene_position1 = gene_position1.set_index('drug', append=True)
|
| 417 |
+
gene_position1.index.names = ['Sequence', 'Target gene', 'drug']
|
| 418 |
+
cols_to_keep = ['Percent Peptide', 'Amino Acid Cut position']
|
| 419 |
+
gene_position1 = gene_position1[cols_to_keep]
|
| 420 |
+
gene_position2 = gene_position2[cols_to_keep]
|
| 421 |
+
|
| 422 |
+
Y = pandas.concat((Y1, Y2), axis=0)
|
| 423 |
+
Xdf = pandas.concat((Xdf1, Xdf2), axis=0)
|
| 424 |
+
gene_position = pandas.concat((gene_position1, gene_position2))
|
| 425 |
+
|
| 426 |
+
# target_genes = target_genes1 + target_genes2
|
| 427 |
+
target_genes = np.concatenate((target_genes1, target_genes2))
|
| 428 |
+
|
| 429 |
+
save_to_file = False
|
| 430 |
+
|
| 431 |
+
if save_to_file:
|
| 432 |
+
Y.index.names = ['Sequence', 'Target', 'drug']
|
| 433 |
+
assert np.all(Xdf.index.values==Y.index.values), "rows don't match up"
|
| 434 |
+
|
| 435 |
+
onedupind = np.where(Y.index.duplicated())[0][0]
|
| 436 |
+
alldupind = np.where(Y.index.get_level_values(0).values==Y.index[onedupind][0])[0]
|
| 437 |
+
|
| 438 |
+
#arbitrarily set one of these to have "nodrug2" as the third level index
|
| 439 |
+
#so that they are not repeated, and the joints therefore do not augment the data set
|
| 440 |
+
assert len(alldupind)==2, "expected only duplicates"
|
| 441 |
+
newindex = Y.index.tolist()
|
| 442 |
+
newindex[onedupind] = (newindex[onedupind][0], newindex[onedupind][1], "nodrug2")
|
| 443 |
+
Y.index = pandas.MultiIndex.from_tuples(newindex, names = Y.index.names)
|
| 444 |
+
Xdf.index = pandas.MultiIndex.from_tuples(newindex, names = Y.index.names)
|
| 445 |
+
|
| 446 |
+
# there seems to be a duplicate index, and thus this increases the data set size, so doing it the hacky way...
|
| 447 |
+
XandY = pandas.merge(Xdf, Y, how="inner", left_index=True, right_index=True)
|
| 448 |
+
gene_position_tmp = gene_position.copy()
|
| 449 |
+
gene_position_tmp.index.names = ['Sequence', 'Target', 'drug']
|
| 450 |
+
gene_position_tmp.index = pandas.MultiIndex.from_tuples(newindex, names = Y.index.names)
|
| 451 |
+
XandY = pandas.merge(XandY, gene_position_tmp, how="inner", left_index=True, right_index=True)
|
| 452 |
+
|
| 453 |
+
# truncate to 30mers
|
| 454 |
+
XandY["30mer"] = XandY["30mer"].apply(lambda x: x[0:30])
|
| 455 |
+
XandY.to_csv(r'D:\Source\CRISPR\data\tmp\V3.csv')
|
| 456 |
+
|
| 457 |
+
return Xdf, Y, gene_position, target_genes
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
def get_V1_genes(data_file=None):
|
| 461 |
+
annotations, gene_position, target_genes, Xdf, Y = read_V1_data(data_file, learn_options=None)
|
| 462 |
+
return target_genes
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
def get_V2_genes(data_file=None):
|
| 466 |
+
Xdf, drugs_to_genes, target_genes, Y, gene_position = read_V2_data(data_file, verbose=False)
|
| 467 |
+
return target_genes
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
def get_V3_genes(data_fileV1=None, data_fileV2=None):
|
| 471 |
+
target_genes = np.concatenate((get_V1_genes(data_fileV1), get_V2_genes(data_fileV2)))
|
| 472 |
+
return target_genes
|
| 473 |
+
|
| 474 |
+
def get_xu_genes(data_file=None):
|
| 475 |
+
return read_xu_et_al(data_file)[1]
|
| 476 |
+
|
| 477 |
+
def get_mouse_genes(data_file=None):
|
| 478 |
+
annotations, gene_position, target_genes, Xdf, Y = read_V1_data(data_file, learn_options=None)
|
| 479 |
+
return Xdf[Xdf['Organism'] == 'mouse']['Target gene'].unique()
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
def get_human_genes(data_file=None):
|
| 483 |
+
annotations, gene_position, target_genes, Xdf, Y = read_V1_data(data_file, learn_options=None)
|
| 484 |
+
mouse_genes = Xdf[Xdf['Organism'] == 'mouse']['Target gene'].unique()
|
| 485 |
+
all_genes = get_V3_genes(None, None) # TODO this needs to support specifying file names (!= 'None')
|
| 486 |
+
return np.setdiff1d(all_genes, mouse_genes)
|
|
@@ -0,0 +1,716 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from importlib import resources
|
| 2 |
+
|
| 3 |
+
import azimuth.predict as pd
|
| 4 |
+
import copy
|
| 5 |
+
import os
|
| 6 |
+
import numpy as np
|
| 7 |
+
import azimuth.util
|
| 8 |
+
import shutil
|
| 9 |
+
import pickle
|
| 10 |
+
# import pylab as plt
|
| 11 |
+
import pandas
|
| 12 |
+
# import azimuth.local_multiprocessing
|
| 13 |
+
import azimuth.load_data
|
| 14 |
+
import azimuth.features.featurization as feat
|
| 15 |
+
import traceback
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def check_feature_set_dims(feature_sets):
|
| 19 |
+
F2 = None
|
| 20 |
+
for set in list(feature_sets.keys()):
|
| 21 |
+
F = feature_sets[set].shape[0]
|
| 22 |
+
if F2 is None: F = F2
|
| 23 |
+
assert F == F2, "not same # individuals for feature %s" % set
|
| 24 |
+
|
| 25 |
+
assert feature_sets != {}, "features are empty, check learn_options"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def set_target(learn_options, classification):
|
| 29 |
+
assert 'target_name' not in list(learn_options.keys()) or learn_options[
|
| 30 |
+
'target_name'] is not None, "changed it to be automatically set here"
|
| 31 |
+
if not classification:
|
| 32 |
+
learn_options["target_name"] = learn_options['rank-transformed target name']
|
| 33 |
+
learn_options["training_metric"] = 'spearmanr'
|
| 34 |
+
learn_options['ground_truth_label'] = learn_options['target_name']
|
| 35 |
+
else:
|
| 36 |
+
learn_options["target_name"] = learn_options['binary target name']
|
| 37 |
+
learn_options["training_metric"] = 'AUC'
|
| 38 |
+
learn_options['ground_truth_label'] = learn_options['binary target name']
|
| 39 |
+
|
| 40 |
+
if learn_options["V"] == 3:
|
| 41 |
+
assert learn_options['target_name'] == 'score_drug_gene_rank' or learn_options[
|
| 42 |
+
'target_name'] == 'score_drug_gene_threshold', "cannot use raw scores when mergind data"
|
| 43 |
+
assert learn_options["ground_truth_label"] == 'score_drug_gene_rank' or learn_options[
|
| 44 |
+
"ground_truth_label"] == 'score_drug_gene_threshold', "cannot use raw scores when mergind data"
|
| 45 |
+
|
| 46 |
+
return learn_options
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def GP_setup(learn_options, likelihood='gaussian', degree=3, set_target_fn=set_target):
|
| 50 |
+
learn_options["method"] = "GPy"
|
| 51 |
+
learn_options['kernel degree'] = degree
|
| 52 |
+
|
| 53 |
+
if likelihood == 'warped':
|
| 54 |
+
learn_options['warpedGP'] = True
|
| 55 |
+
else:
|
| 56 |
+
learn_options['warpedGP'] = False
|
| 57 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 58 |
+
|
| 59 |
+
return learn_options
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def SVC_setup(learn_options, likelihood='gaussian', degree=3, set_target_fn=set_target):
|
| 63 |
+
learn_options["method"] = "SVC"
|
| 64 |
+
learn_options = set_target_fn(learn_options, classification=True)
|
| 65 |
+
|
| 66 |
+
return learn_options
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def L1_setup(learn_options, set_target_fn=set_target):
|
| 70 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 71 |
+
learn_options["method"] = "linreg"
|
| 72 |
+
learn_options["penalty"] = "L1"
|
| 73 |
+
learn_options["feature_select"] = False
|
| 74 |
+
if "alpha" not in list(learn_options.keys()):
|
| 75 |
+
learn_options["alpha"] = np.array([1e-6 * pow(1.3, x) for x in range(0, 100)])
|
| 76 |
+
learn_options["loss"] = "squared"
|
| 77 |
+
|
| 78 |
+
return learn_options
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def L2_setup(learn_options, set_target_fn=set_target):
|
| 82 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 83 |
+
learn_options["method"] = "linreg"
|
| 84 |
+
learn_options["penalty"] = "L2"
|
| 85 |
+
learn_options["feature_select"] = False
|
| 86 |
+
if "alpha" not in list(learn_options.keys()):
|
| 87 |
+
learn_options["alpha"] = np.array([1e-6 * pow(1.3, x) for x in range(0, 100)])
|
| 88 |
+
learn_options["loss"] = "squared"
|
| 89 |
+
|
| 90 |
+
return learn_options
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def mean_setup(learn_options, set_target_fn=set_target):
|
| 94 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 95 |
+
learn_options['method'] = 'mean'
|
| 96 |
+
return learn_options
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def random_setup(learn_options, set_target_fn=set_target):
|
| 100 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 101 |
+
learn_options['method'] = 'random'
|
| 102 |
+
return learn_options
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def elasticnet_setup(learn_options, set_target_fn=set_target):
|
| 106 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 107 |
+
learn_options["method"] = "linreg"
|
| 108 |
+
learn_options["penalty"] = "EN"
|
| 109 |
+
learn_options["feature_select"] = False
|
| 110 |
+
learn_options["loss"] = "squared"
|
| 111 |
+
if "alpha" not in list(learn_options.keys()):
|
| 112 |
+
learn_options["alpha"] = np.array([1e-5 * pow(2, x) for x in range(0, 30)])
|
| 113 |
+
return learn_options
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def DNN_setup(learn_options, set_target_fn=set_target):
|
| 117 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 118 |
+
learn_options['method'] = 'DNN'
|
| 119 |
+
learn_options['DNN target variable'] = 'score' # 'score_drug_gene_quantized'
|
| 120 |
+
# learn_options['DNN architecture'] = (119, 10, 10, 10, 2)
|
| 121 |
+
return learn_options
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def RF_setup(learn_options, set_target_fn=set_target):
|
| 125 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 126 |
+
learn_options['method'] = 'RandomForestRegressor'
|
| 127 |
+
return learn_options
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def doench_setup(learn_options, set_target_fn=set_target):
|
| 131 |
+
learn_options = set_target_fn(learn_options, classification=True)
|
| 132 |
+
learn_options['method'] = 'doench'
|
| 133 |
+
return learn_options
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def sgrna_from_doench_setup(learn_options, set_target_fn=set_target):
|
| 137 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 138 |
+
learn_options['method'] = 'sgrna_from_doench'
|
| 139 |
+
return learn_options
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def linreg_setup(learn_options, set_target_fn=set_target):
|
| 143 |
+
learn_options["method"] = "linreg"
|
| 144 |
+
learn_options["penalty"] = "L1"
|
| 145 |
+
learn_options["feature_select"] = False
|
| 146 |
+
if "alpha" not in list(learn_options.keys()):
|
| 147 |
+
learn_options["alpha"] = np.array([0.0])
|
| 148 |
+
learn_options["loss"] = "squared"
|
| 149 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 150 |
+
|
| 151 |
+
return learn_options
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def logregL1_setup(learn_options, set_target_fn=set_target):
|
| 155 |
+
learn_options = set_target_fn(learn_options, classification=True)
|
| 156 |
+
learn_options["method"] = "logregL1"
|
| 157 |
+
learn_options["penalty"] = "L1"
|
| 158 |
+
learn_options["feature_select"] = False
|
| 159 |
+
if "alpha" not in list(learn_options.keys()):
|
| 160 |
+
learn_options["alpha"] = np.array([1e-6 * pow(1.3, x) for x in range(0, 100)])
|
| 161 |
+
if "fit_intercept" not in learn_options:
|
| 162 |
+
learn_options["fit_intercept"] = True
|
| 163 |
+
return learn_options
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def LASSOs_ensemble_setup(learn_options, set_target_fn=set_target):
|
| 167 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 168 |
+
learn_options["method"] = "lasso_ensemble"
|
| 169 |
+
learn_options["penalty"] = "L1"
|
| 170 |
+
learn_options["feature_select"] = False
|
| 171 |
+
if "alpha" not in list(learn_options.keys()):
|
| 172 |
+
learn_options["alpha"] = np.array([1e-6 * pow(1.3, x) for x in range(0, 100)])
|
| 173 |
+
learn_options["loss"] = "squared"
|
| 174 |
+
|
| 175 |
+
return learn_options
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def xu_et_al_setup(learn_options, set_target_fn=set_target):
|
| 179 |
+
learn_options = set_target_fn(learn_options, classification=True)
|
| 180 |
+
learn_options["method"] = "xu_et_al"
|
| 181 |
+
|
| 182 |
+
return learn_options
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def adaboost_setup(learn_options, num_estimators=100, max_depth=3, learning_rate=0.1, set_target_fn=set_target,
|
| 186 |
+
model="AdaBoost"):
|
| 187 |
+
"""
|
| 188 |
+
"""
|
| 189 |
+
learn_options = set_target_fn(learn_options, classification=False)
|
| 190 |
+
if model == "AdaBoost":
|
| 191 |
+
learn_options['method'] = "AdaBoostRegressor"
|
| 192 |
+
elif model == "AdaBoostClassifier":
|
| 193 |
+
learn_options['method'] = "AdaBoostClassifier"
|
| 194 |
+
else:
|
| 195 |
+
raise Exception("model must be either AdaBoost or AdaBoost Classifier")
|
| 196 |
+
learn_options['adaboost_version'] = 'python' # "R" or "python"
|
| 197 |
+
|
| 198 |
+
if 'adaboost_loss' not in list(learn_options.keys()) and model == "AdaBoostRegressor":
|
| 199 |
+
learn_options['adaboost_loss'] = 'squared_error'
|
| 200 |
+
elif model == "AdaBoostRegressor":
|
| 201 |
+
# Ensure that the loss is always set to a valid value for the regressor
|
| 202 |
+
learn_options['adaboost_loss'] = 'squared_error'
|
| 203 |
+
|
| 204 |
+
if 'adaboost_alpha' not in list(learn_options.keys()):
|
| 205 |
+
learn_options['adaboost_alpha'] = 0.5 # this parameter is only used by the huber and quantile loss functions.
|
| 206 |
+
|
| 207 |
+
if not learn_options['adaboost_CV']:
|
| 208 |
+
learn_options['adaboost_learning_rate'] = learning_rate
|
| 209 |
+
learn_options['adaboost_n_estimators'] = num_estimators
|
| 210 |
+
learn_options['adaboost_max_depth'] = max_depth
|
| 211 |
+
else:
|
| 212 |
+
learn_options['adaboost_n_estimators'] = num_estimators
|
| 213 |
+
|
| 214 |
+
return learn_options
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def shared_setup(learn_options, order, test):
|
| 218 |
+
if 'num_proc' not in list(learn_options.keys()):
|
| 219 |
+
learn_options['num_proc'] = None
|
| 220 |
+
if 'num_thread_per_proc' not in list(learn_options.keys()):
|
| 221 |
+
learn_options['num_thread_per_proc'] = None
|
| 222 |
+
|
| 223 |
+
num_proc = azimuth.local_multiprocessing.configure(TEST=test, num_proc=learn_options["num_proc"],
|
| 224 |
+
num_thread_per_proc=learn_options["num_thread_per_proc"])
|
| 225 |
+
learn_options["num_proc"] = num_proc
|
| 226 |
+
|
| 227 |
+
learn_options["order"] = order # gets used many places in code, not just here
|
| 228 |
+
|
| 229 |
+
if "cv" not in list(learn_options.keys()):
|
| 230 |
+
# if no CV preference is specified, use leave-one-gene-out
|
| 231 |
+
learn_options["cv"] = "gene"
|
| 232 |
+
|
| 233 |
+
if "normalize_features" not in list(learn_options.keys()):
|
| 234 |
+
# if no CV preference is specified, use leave-one-gene-out
|
| 235 |
+
learn_options["normalize_features"] = True
|
| 236 |
+
|
| 237 |
+
if "weighted" not in list(learn_options.keys()):
|
| 238 |
+
learn_options['weighted'] = None
|
| 239 |
+
|
| 240 |
+
if "all pairs" not in list(learn_options.keys()):
|
| 241 |
+
learn_options["all pairs"] = False
|
| 242 |
+
|
| 243 |
+
if "include_known_pairs" not in list(learn_options.keys()):
|
| 244 |
+
learn_options["include_known_pairs"] = False
|
| 245 |
+
|
| 246 |
+
if "include_gene_guide_feature" not in list(learn_options.keys()):
|
| 247 |
+
learn_options["include_gene_guide_feature"] = 0 # used as window size, so 0 is none
|
| 248 |
+
|
| 249 |
+
# these should default to true to match experiments before they were options:
|
| 250 |
+
if "gc_features" not in list(learn_options.keys()):
|
| 251 |
+
learn_options["gc_features"] = True
|
| 252 |
+
if "nuc_features" not in list(learn_options.keys()):
|
| 253 |
+
learn_options["nuc_features"] = True
|
| 254 |
+
|
| 255 |
+
if 'train_genes' not in list(learn_options.keys()):
|
| 256 |
+
learn_options["train_genes"] = None
|
| 257 |
+
if 'test_genes' not in list(learn_options.keys()):
|
| 258 |
+
learn_options["test_genes"] = None
|
| 259 |
+
|
| 260 |
+
if "num_proc" not in learn_options:
|
| 261 |
+
learn_options["num_proc"] = None
|
| 262 |
+
if "num_thread_per_proc" not in learn_options:
|
| 263 |
+
learn_options["num_thread_per_proc"] = None
|
| 264 |
+
|
| 265 |
+
if 'seed' not in learn_options:
|
| 266 |
+
learn_options['seed'] = 1
|
| 267 |
+
|
| 268 |
+
if "flipV1target" not in learn_options:
|
| 269 |
+
learn_options["flipV1target"] = False
|
| 270 |
+
|
| 271 |
+
if 'num_genes_remove_train' not in learn_options:
|
| 272 |
+
learn_options['num_genes_remove_train'] = None
|
| 273 |
+
|
| 274 |
+
if "include_microhomology" not in learn_options:
|
| 275 |
+
learn_options["include_microhomology"] = False
|
| 276 |
+
|
| 277 |
+
if "algorithm_hyperparam_search" not in learn_options:
|
| 278 |
+
learn_options["algorithm_hyperparam_search"] = "grid" # other options is bo for bayesian optimization
|
| 279 |
+
|
| 280 |
+
return num_proc
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def setup(test=False, order=1, learn_options=None, data_file=None, pam_audit=True, length_audit=True):
|
| 284 |
+
num_proc = shared_setup(learn_options, order, test)
|
| 285 |
+
|
| 286 |
+
assert "testing_non_binary_target_name" in list(
|
| 287 |
+
learn_options.keys()), "need this in order to get metrics, though used to be not needed, so you may newly see this error"
|
| 288 |
+
if learn_options["testing_non_binary_target_name"] not in ['ranks', 'raw', 'thrs']:
|
| 289 |
+
raise Exception('learn_otions["testing_non_binary_target_name"] must be in ["ranks", "raw", "thrs"]')
|
| 290 |
+
|
| 291 |
+
Xdf, Y, gene_position, target_genes = azimuth.load_data.from_file(data_file, learn_options)
|
| 292 |
+
learn_options['all_genes'] = target_genes
|
| 293 |
+
|
| 294 |
+
if test:
|
| 295 |
+
learn_options["order"] = 1
|
| 296 |
+
|
| 297 |
+
if 'convert_30mer_to_31mer' in learn_options and learn_options['convert_30mer_to_31mer'] is True:
|
| 298 |
+
print(
|
| 299 |
+
"WARNING!!! converting 30 mer to 31 mer (and then cutting off first nucleotide to go back to 30mer with a right shift)")
|
| 300 |
+
for i in range(Xdf.shape[0]):
|
| 301 |
+
Xdf['30mer'].iloc[i] = azimuth.util.convert_to_thirty_one(Xdf.iloc[i]["30mer"], Xdf.index.values[i][1],
|
| 302 |
+
Xdf.iloc[i]["Strand"])
|
| 303 |
+
# to_keep = Xdf['30mer'].isnull() == False
|
| 304 |
+
# Xdf = Xdf[to_keep]
|
| 305 |
+
# gene_position = gene_position[to_keep]
|
| 306 |
+
# Y = Y[to_keep]
|
| 307 |
+
Xdf["30mer"] = Xdf["30mer"].apply(lambda x: x[1:]) # chop the first nucleotide
|
| 308 |
+
|
| 309 |
+
if 'left_right_guide_ind' in learn_options and learn_options['left_right_guide_ind'] is not None:
|
| 310 |
+
seq_start, seq_end, expected_length = learn_options['left_right_guide_ind']
|
| 311 |
+
assert len(Xdf["30mer"].values[0]) == expected_length
|
| 312 |
+
Xdf['30mer'] = Xdf['30mer'].apply(lambda seq: seq[seq_start:seq_end])
|
| 313 |
+
|
| 314 |
+
feature_sets = feat.featurize_data(Xdf, learn_options, Y, gene_position, pam_audit=pam_audit,
|
| 315 |
+
length_audit=length_audit)
|
| 316 |
+
np.random.seed(learn_options['seed'])
|
| 317 |
+
|
| 318 |
+
return Y, feature_sets, target_genes, learn_options, num_proc
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
def run_models(models, orders, GP_likelihoods=['gaussian', 'warped'], WD_kernel_degrees=[3],
|
| 322 |
+
adaboost_learning_rates=[0.1], adaboost_num_estimators=[100], adaboost_max_depths=[3],
|
| 323 |
+
learn_options_set=None, test=False, CV=True, setup_function=setup, set_target_fn=set_target,
|
| 324 |
+
pam_audit=True, length_audit=True, return_data=False):
|
| 325 |
+
'''
|
| 326 |
+
CV is set to false if want to train a final model and not cross-validate, but it goes in to what
|
| 327 |
+
looks like cv code
|
| 328 |
+
'''
|
| 329 |
+
|
| 330 |
+
results = {}
|
| 331 |
+
assert learn_options_set is not None, "need to specify learn_options_set"
|
| 332 |
+
all_learn_options = {}
|
| 333 |
+
|
| 334 |
+
# shorten so easier to display on graphs
|
| 335 |
+
feat_models_short = {'L1': "L1", 'L2': "L2", 'elasticnet': "EN", 'linreg': "LR",
|
| 336 |
+
'RandomForest': "RF",
|
| 337 |
+
'AdaBoost': "AB", 'AdaBoostClassifier': "ABClass", 'doench': 'doench',
|
| 338 |
+
"logregL1": "logregL1", "sgrna_from_doench": "sgrna_from_doench", 'SVC': 'SVC',
|
| 339 |
+
'xu_et_al': 'xu_et_al'}
|
| 340 |
+
|
| 341 |
+
if not CV:
|
| 342 |
+
print("Received option CV=False, so I'm training using all of the data")
|
| 343 |
+
assert len(list(learn_options_set.keys())) == 1, "when CV is False, only 1 set of learn options is allowed"
|
| 344 |
+
assert len(models) == 1, "when CV is False, only 1 model is allowed"
|
| 345 |
+
|
| 346 |
+
for learn_options_str in list(learn_options_set.keys()):
|
| 347 |
+
# these options get augmented in setup
|
| 348 |
+
partial_learn_opt = learn_options_set[learn_options_str]
|
| 349 |
+
# if the model requires encoded features
|
| 350 |
+
for model in models:
|
| 351 |
+
# models requiring explicit featurization
|
| 352 |
+
if model in list(feat_models_short.keys()):
|
| 353 |
+
for order in orders:
|
| 354 |
+
print("running %s, order %d for %s" % (model, order, learn_options_str))
|
| 355 |
+
|
| 356 |
+
Y, feature_sets, target_genes, learn_options, num_proc = setup_function(test=test, order=order,
|
| 357 |
+
learn_options=partial_learn_opt,
|
| 358 |
+
pam_audit=pam_audit,
|
| 359 |
+
length_audit=length_audit) # TODO precompute features for all orders, as this is repated for each model
|
| 360 |
+
|
| 361 |
+
if model == 'L1':
|
| 362 |
+
learn_options_model = L1_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
|
| 363 |
+
elif model == 'L2':
|
| 364 |
+
learn_options_model = L2_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
|
| 365 |
+
elif model == 'elasticnet':
|
| 366 |
+
learn_options_model = elasticnet_setup(copy.deepcopy(learn_options),
|
| 367 |
+
set_target_fn=set_target_fn)
|
| 368 |
+
elif model == 'linreg':
|
| 369 |
+
learn_options_model = linreg_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
|
| 370 |
+
elif model == "logregL1":
|
| 371 |
+
learn_options_model = logregL1_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
|
| 372 |
+
elif model == 'RandomForest':
|
| 373 |
+
learn_options_model = RF_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
|
| 374 |
+
elif model == 'SVC':
|
| 375 |
+
learn_options_model = SVC_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
|
| 376 |
+
elif model == 'doench':
|
| 377 |
+
learn_options_model = doench_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
|
| 378 |
+
elif model == 'sgrna_from_doench':
|
| 379 |
+
learn_options_model = sgrna_from_doench_setup(copy.deepcopy(learn_options),
|
| 380 |
+
set_target_fn=set_target_fn)
|
| 381 |
+
elif model == 'xu_et_al':
|
| 382 |
+
learn_options_model = xu_et_al_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
|
| 383 |
+
elif model == 'AdaBoost' or 'AdaBoostClassifier':
|
| 384 |
+
for learning_rate in adaboost_learning_rates:
|
| 385 |
+
for num_estimators in adaboost_num_estimators:
|
| 386 |
+
for max_depth in adaboost_max_depths:
|
| 387 |
+
learn_options_model = adaboost_setup(copy.deepcopy(learn_options),
|
| 388 |
+
learning_rate=learning_rate,
|
| 389 |
+
num_estimators=num_estimators,
|
| 390 |
+
max_depth=max_depth,
|
| 391 |
+
set_target_fn=set_target_fn, model=model)
|
| 392 |
+
model_string = feat_models_short[model] + '_or%d_md%d_lr%.2f_n%d_%s' % (
|
| 393 |
+
learn_options_set[learn_options_str]["order"], max_depth, learning_rate, num_estimators,
|
| 394 |
+
learn_options_str)
|
| 395 |
+
if model != 'AdaBoost':
|
| 396 |
+
model_string = feat_models_short[model] + '_ord%d_%s' % (
|
| 397 |
+
learn_options_set[learn_options_str]["order"], learn_options_str)
|
| 398 |
+
|
| 399 |
+
results[model_string] = pd.cross_validate(Y, feature_sets, learn_options=learn_options_model,
|
| 400 |
+
TEST=test, CV=CV)
|
| 401 |
+
|
| 402 |
+
all_learn_options[model_string] = learn_options_model
|
| 403 |
+
# if the model doesn't require explicit featurization
|
| 404 |
+
else:
|
| 405 |
+
assert setup_fn == setup, "not yet modified to handle this"
|
| 406 |
+
print("running %s for %s" % (model, learn_options_str))
|
| 407 |
+
Y, feature_sets, target_genes, learn_options, num_proc = setup(test=test, order=1,
|
| 408 |
+
learn_options=partial_learn_opt,
|
| 409 |
+
pam_audit=pam_audit,
|
| 410 |
+
length_audit=length_audit)
|
| 411 |
+
if model == 'mean':
|
| 412 |
+
learn_options_model = mean_setup(copy.deepcopy(learn_options))
|
| 413 |
+
elif model == 'random':
|
| 414 |
+
learn_options_model = random_setup(copy.deepcopy(learn_options))
|
| 415 |
+
elif model == 'DNN':
|
| 416 |
+
learn_options_model = DNN_setup(copy.deepcopy(learn_options))
|
| 417 |
+
elif model == 'GP':
|
| 418 |
+
for likelihood in GP_likelihoods:
|
| 419 |
+
for degree in WD_kernel_degrees:
|
| 420 |
+
learn_options_model = GP_setup(copy.deepcopy(learn_options), likelihood=likelihood,
|
| 421 |
+
degree=degree)
|
| 422 |
+
model_string = '%s_%s_degree%d_%s' % (model, likelihood, degree, learn_options_str)
|
| 423 |
+
results[model_string] = pd.cross_validate(Y, feature_sets,
|
| 424 |
+
learn_options=learn_options_model, TEST=test,
|
| 425 |
+
CV=CV)
|
| 426 |
+
|
| 427 |
+
else:
|
| 428 |
+
raise NotImplementedError("model %s not supported" % model)
|
| 429 |
+
|
| 430 |
+
# "GP" already calls pd.cross_validate() and has its own model_string, so skip this.
|
| 431 |
+
if model != "GP":
|
| 432 |
+
model_string = model + '_%s' % learn_options_str
|
| 433 |
+
results[model_string] = pd.cross_validate(Y, feature_sets, learn_options=learn_options_model,
|
| 434 |
+
TEST=test, CV=CV)
|
| 435 |
+
|
| 436 |
+
all_learn_options[model_string] = learn_options_model
|
| 437 |
+
|
| 438 |
+
return results, all_learn_options
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
def pickle_runner_results(exp_name, results, all_learn_options, relpath="/../" + "results"):
|
| 442 |
+
abspath = os.path.abspath(__file__)
|
| 443 |
+
dname = os.path.dirname(abspath) + relpath
|
| 444 |
+
if not os.path.exists(dname):
|
| 445 |
+
os.makedirs(dname)
|
| 446 |
+
print("Created directory: %s" % str(dname))
|
| 447 |
+
if exp_name is None:
|
| 448 |
+
exp_name = list(results.keys())[0]
|
| 449 |
+
myfile = dname + '/' + exp_name + '.pickle'
|
| 450 |
+
with open(myfile, 'wb') as f:
|
| 451 |
+
print("writing results to %s" % myfile)
|
| 452 |
+
pickle.dump((results, all_learn_options), f, -1)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
def runner(models, learn_options, GP_likelihoods=None, orders=None, WD_kernel_degrees=None, where='local',
|
| 456 |
+
cluster_user='fusi', cluster='RR1-N13-09-H44', test=False, exp_name=None, **kwargs):
|
| 457 |
+
if where == 'local':
|
| 458 |
+
results, all_learn_options = run_models(models, orders=orders, GP_likelihoods=GP_likelihoods,
|
| 459 |
+
learn_options_set=learn_options, WD_kernel_degrees=WD_kernel_degrees,
|
| 460 |
+
test=test, **kwargs)
|
| 461 |
+
all_metrics, gene_names = azimuth.util.get_all_metrics(results, learn_options)
|
| 462 |
+
azimuth.util.plot_all_metrics(all_metrics, gene_names, all_learn_options, save=True)
|
| 463 |
+
|
| 464 |
+
# for non-local (i.e. cluster), the comparable code is in cli_run_model.py
|
| 465 |
+
pickle_runner_results(exp_name, results, all_learn_options)
|
| 466 |
+
|
| 467 |
+
return results, all_learn_options, all_metrics, gene_names
|
| 468 |
+
|
| 469 |
+
elif where == 'cluster':
|
| 470 |
+
from . import cluster_job
|
| 471 |
+
|
| 472 |
+
# create random cluster directory, dump learn options, and create cluster file
|
| 473 |
+
tempdir, user, clust_filename = cluster_job.create(cluster_user, models, orders, WD_kernel_degrees,
|
| 474 |
+
GP_likelihoods, exp_name=exp_name,
|
| 475 |
+
learn_options=learn_options, **kwargs)
|
| 476 |
+
|
| 477 |
+
# raw_input("Submit job to HPC and press any key when it's finished: ")
|
| 478 |
+
# util.plot_cluster_results(directory=tempdir)
|
| 479 |
+
|
| 480 |
+
# stdout = tempdir + r"/stdout"
|
| 481 |
+
# stderr = tempdir + r"/stderr"
|
| 482 |
+
# if not os.path.exists(stdout): os.makedirs(stdout)
|
| 483 |
+
# if not os.path.exists(stderr): os.makedirs(stderr)
|
| 484 |
+
|
| 485 |
+
return tempdir, clust_filename, user # , stdout, stderr
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
def save_final_model_V3(filename=None, include_position=True, learn_options=None, short_name='final', pam_audit=True,
|
| 489 |
+
length_audit=True):
|
| 490 |
+
'''
|
| 491 |
+
run_models(produce_final_model=True) is what saves the model
|
| 492 |
+
'''
|
| 493 |
+
test = False
|
| 494 |
+
assert filename is not None, "need to provide filename to save final model"
|
| 495 |
+
|
| 496 |
+
if learn_options is None:
|
| 497 |
+
if include_position:
|
| 498 |
+
learn_options = {"V": 3,
|
| 499 |
+
'train_genes': azimuth.load_data.get_V3_genes(),
|
| 500 |
+
'test_genes': azimuth.load_data.get_V3_genes(),
|
| 501 |
+
"testing_non_binary_target_name": 'ranks',
|
| 502 |
+
'include_pi_nuc_feat': True,
|
| 503 |
+
"gc_features": True,
|
| 504 |
+
"nuc_features": True,
|
| 505 |
+
"include_gene_position": True,
|
| 506 |
+
"include_NGGX_interaction": True,
|
| 507 |
+
"include_Tm": True,
|
| 508 |
+
"include_strand": False,
|
| 509 |
+
"include_gene_feature": False,
|
| 510 |
+
"include_gene_guide_feature": 0,
|
| 511 |
+
"extra pairs": False,
|
| 512 |
+
"weighted": None,
|
| 513 |
+
"training_metric": 'spearmanr',
|
| 514 |
+
"NDGC_k": 10,
|
| 515 |
+
"cv": "gene",
|
| 516 |
+
"include_gene_effect": False,
|
| 517 |
+
"include_drug": False,
|
| 518 |
+
"include_sgRNAscore": False,
|
| 519 |
+
'adaboost_loss': 'squared_error',
|
| 520 |
+
# main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details
|
| 521 |
+
'adaboost_alpha': 0.5,
|
| 522 |
+
# this parameter is only used by the huber and quantile loss functions.
|
| 523 |
+
'normalize_features': False,
|
| 524 |
+
'adaboost_CV': False
|
| 525 |
+
}
|
| 526 |
+
else:
|
| 527 |
+
learn_options = {"V": 3,
|
| 528 |
+
'train_genes': azimuth.load_data.get_V3_genes(),
|
| 529 |
+
'test_genes': azimuth.load_data.get_V3_genes(),
|
| 530 |
+
"testing_non_binary_target_name": 'ranks',
|
| 531 |
+
'include_pi_nuc_feat': True,
|
| 532 |
+
"gc_features": True,
|
| 533 |
+
"nuc_features": True,
|
| 534 |
+
"include_gene_position": False,
|
| 535 |
+
"include_NGGX_interaction": True,
|
| 536 |
+
"include_Tm": True,
|
| 537 |
+
"include_strand": False,
|
| 538 |
+
"include_gene_feature": False,
|
| 539 |
+
"include_gene_guide_feature": 0,
|
| 540 |
+
"extra pairs": False,
|
| 541 |
+
"weighted": None,
|
| 542 |
+
"training_metric": 'spearmanr',
|
| 543 |
+
"NDGC_k": 10,
|
| 544 |
+
"cv": "gene",
|
| 545 |
+
"include_gene_effect": False,
|
| 546 |
+
"include_drug": False,
|
| 547 |
+
"include_sgRNAscore": False,
|
| 548 |
+
'adaboost_loss': 'squared_error',
|
| 549 |
+
# main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details
|
| 550 |
+
'adaboost_alpha': 0.5,
|
| 551 |
+
# this parameter is only used by the huber and quantile loss functions.
|
| 552 |
+
'normalize_features': False,
|
| 553 |
+
'adaboost_CV': False
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
learn_options_set = {short_name: learn_options}
|
| 557 |
+
results, all_learn_options = run_models(["AdaBoost"], orders=[2], adaboost_learning_rates=[0.1],
|
| 558 |
+
adaboost_max_depths=[3], adaboost_num_estimators=[100],
|
| 559 |
+
learn_options_set=learn_options_set,
|
| 560 |
+
test=test, CV=False, pam_audit=length_audit, length_audit=length_audit)
|
| 561 |
+
|
| 562 |
+
print(f"Results: {results}")
|
| 563 |
+
print(f"All learn options: {all_learn_options}")
|
| 564 |
+
|
| 565 |
+
model = list(results.values())[0][3][0]
|
| 566 |
+
|
| 567 |
+
try:
|
| 568 |
+
with open(filename, 'wb') as f:
|
| 569 |
+
pickle.dump((model, learn_options), f, protocol=4)
|
| 570 |
+
print(f"Model saved successfully to {filename}")
|
| 571 |
+
except Exception as e:
|
| 572 |
+
print(f"Error saving model to {filename}: {str(e)}")
|
| 573 |
+
traceback.print_exc()
|
| 574 |
+
|
| 575 |
+
return model
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
def predict(seq, aa_cut=None, percent_peptide=None, model=None, model_file=None, pam_audit=True, length_audit=False,
|
| 579 |
+
learn_options_override=None):
|
| 580 |
+
"""
|
| 581 |
+
Args:
|
| 582 |
+
seq: numpy array of 30 nt sequences.
|
| 583 |
+
aa_cut: numpy array of amino acid cut positions (optional).
|
| 584 |
+
percent_peptide: numpy array of percent peptide (optional).
|
| 585 |
+
model: model instance to use for prediction (optional).
|
| 586 |
+
model_file: file name of pickled model to use for prediction (optional).
|
| 587 |
+
pam_audit: check PAM of each sequence.
|
| 588 |
+
length_audit: check length of each sequence.
|
| 589 |
+
learn_options_override: a dictionary indicating which learn_options to override (optional).
|
| 590 |
+
|
| 591 |
+
Returns: a numpy array of predictions.
|
| 592 |
+
"""
|
| 593 |
+
# assert not (model is None and model_file is None), "you have to specify either a model or a model_file"
|
| 594 |
+
assert isinstance(seq, (np.ndarray)), "Please ensure seq is a numpy array"
|
| 595 |
+
assert len(seq[0]) > 0, "Make sure that seq is not empty"
|
| 596 |
+
assert isinstance(seq[0],
|
| 597 |
+
str), "Please ensure input sequences are in string format, i.e. 'AGAG' rather than ['A' 'G' 'A' 'G'] or alternate representations"
|
| 598 |
+
|
| 599 |
+
if aa_cut is not None:
|
| 600 |
+
assert len(aa_cut) > 0, "Make sure that aa_cut is not empty"
|
| 601 |
+
assert isinstance(aa_cut, (np.ndarray)), "Please ensure aa_cut is a numpy array"
|
| 602 |
+
assert np.all(np.isreal(aa_cut)), "amino-acid cut position needs to be a real number"
|
| 603 |
+
|
| 604 |
+
if percent_peptide is not None:
|
| 605 |
+
assert len(percent_peptide) > 0, "Make sure that percent_peptide is not empty"
|
| 606 |
+
assert isinstance(percent_peptide, (np.ndarray)), "Please ensure percent_peptide is a numpy array"
|
| 607 |
+
assert np.all(np.isreal(percent_peptide)), "percent_peptide needs to be a real number"
|
| 608 |
+
|
| 609 |
+
if model_file is None:
|
| 610 |
+
if np.any(percent_peptide == -1) or (percent_peptide is None and aa_cut is None):
|
| 611 |
+
print("No model file specified, using V3_model_nopos")
|
| 612 |
+
model_name = 'V3_model_nopos.pickle'
|
| 613 |
+
else:
|
| 614 |
+
print("No model file specified, using V3_model_full")
|
| 615 |
+
model_name = 'V3_model_full.pickle'
|
| 616 |
+
|
| 617 |
+
model_file = os.path.join(os.path.dirname(__file__), 'saved_models', model_name)
|
| 618 |
+
print(f"Looking for model file: {model_file}")
|
| 619 |
+
|
| 620 |
+
if not os.path.exists(model_file):
|
| 621 |
+
print(f"Model file not found: {model_file}")
|
| 622 |
+
print("Please run 'python azimuth/model_comparison.py' to generate the model files.")
|
| 623 |
+
print("After generating the models, move them to a 'saved_models' directory in your project root.")
|
| 624 |
+
raise FileNotFoundError(f"Model file not found: {model_file}")
|
| 625 |
+
|
| 626 |
+
# Use 'with' statement to properly open and close the file
|
| 627 |
+
with open(model_file, 'rb') as f:
|
| 628 |
+
model = pickle.load(f, encoding='bytes')
|
| 629 |
+
|
| 630 |
+
if model is None:
|
| 631 |
+
with open(model_file, 'rb') as f:
|
| 632 |
+
model, learn_options = pickle.load(f, encoding='bytes')
|
| 633 |
+
else:
|
| 634 |
+
model, learn_options = model
|
| 635 |
+
|
| 636 |
+
learn_options["V"] = 2
|
| 637 |
+
|
| 638 |
+
learn_options = override_learn_options(learn_options_override, learn_options)
|
| 639 |
+
|
| 640 |
+
# Y, feature_sets, target_genes, learn_options, num_proc = setup(test=False, order=2, learn_options=learn_options, data_file=test_filename)
|
| 641 |
+
# inputs, dim, dimsum, feature_names = pd.concatenate_feature_sets(feature_sets)
|
| 642 |
+
|
| 643 |
+
Xdf = pandas.DataFrame(columns=['30mer', 'Strand'], data=list(zip(seq, ['NA' for x in range(len(seq))])))
|
| 644 |
+
|
| 645 |
+
if np.all(percent_peptide != -1) and (percent_peptide is not None and aa_cut is not None):
|
| 646 |
+
gene_position = pandas.DataFrame(columns=['Percent Peptide', 'Amino Acid Cut position'],
|
| 647 |
+
data=list(zip(percent_peptide, aa_cut)))
|
| 648 |
+
else:
|
| 649 |
+
gene_position = pandas.DataFrame(columns=['Percent Peptide', 'Amino Acid Cut position'],
|
| 650 |
+
data=list(zip(np.ones(seq.shape[0]) * -1, np.ones(seq.shape[0]) * -1)))
|
| 651 |
+
|
| 652 |
+
feature_sets = feat.featurize_data(Xdf, learn_options, pandas.DataFrame(), gene_position, pam_audit=pam_audit,
|
| 653 |
+
length_audit=length_audit)
|
| 654 |
+
inputs, dim, dimsum, feature_names = azimuth.util.concatenate_feature_sets(feature_sets)
|
| 655 |
+
|
| 656 |
+
# print "CRISPR"
|
| 657 |
+
# pandas.DataFrame(inputs).to_csv("CRISPR.inputs.test.csv")
|
| 658 |
+
# import ipdb; ipdb.set_trace()
|
| 659 |
+
|
| 660 |
+
# call to scikit-learn, returns a vector of predicted values
|
| 661 |
+
preds = model.predict(inputs)
|
| 662 |
+
|
| 663 |
+
# also check that predictions are not 0/1 from a classifier.predict() (instead of predict_proba() or decision_function())
|
| 664 |
+
unique_preds = np.unique(preds)
|
| 665 |
+
ok = False
|
| 666 |
+
for pr in preds:
|
| 667 |
+
if pr not in [0, 1]:
|
| 668 |
+
ok = True
|
| 669 |
+
assert ok, "model returned only 0s and 1s"
|
| 670 |
+
return preds
|
| 671 |
+
|
| 672 |
+
|
| 673 |
+
def override_learn_options(learn_options_override, learn_options):
|
| 674 |
+
"""
|
| 675 |
+
override all keys seen in learn_options_override to alter learn_options
|
| 676 |
+
"""
|
| 677 |
+
if learn_options_override is not None:
|
| 678 |
+
for k in list(learn_options_override.keys()):
|
| 679 |
+
learn_options[k] = learn_options_override[k]
|
| 680 |
+
return learn_options
|
| 681 |
+
|
| 682 |
+
|
| 683 |
+
def fill_learn_options(learn_options_used_to_fill, learn_options_with_possible_missing):
|
| 684 |
+
"""
|
| 685 |
+
only fill in keys that are missing from learn_options from learn_options_fill
|
| 686 |
+
"""
|
| 687 |
+
if learn_options_used_to_fill is not None:
|
| 688 |
+
for k in list(learn_options_used_to_fill.keys()):
|
| 689 |
+
if k not in learn_options_with_possible_missing:
|
| 690 |
+
learn_options_with_possible_missing[k] = learn_options_used_to_fill[k]
|
| 691 |
+
return learn_options_with_possible_missing
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
def write_results(predictions, file_to_predict):
|
| 695 |
+
newfile = file_to_predict.replace(".csv", ".pred.csv")
|
| 696 |
+
data = pandas.read_csv(file_to_predict)
|
| 697 |
+
data['predictions'] = predictions
|
| 698 |
+
data.to_csv(newfile)
|
| 699 |
+
print("wrote results to %s" % newfile)
|
| 700 |
+
return data, newfile
|
| 701 |
+
|
| 702 |
+
|
| 703 |
+
if __name__ == '__main__':
|
| 704 |
+
try:
|
| 705 |
+
# Save the model without position information
|
| 706 |
+
nopos_model = save_final_model_V3(filename=os.path.expanduser('~/V3_model_nopos.pickle'), include_position=False)
|
| 707 |
+
print("Model without position information saved successfully.")
|
| 708 |
+
|
| 709 |
+
# Save the model with position information
|
| 710 |
+
full_model = save_final_model_V3(filename=os.path.expanduser('~/V3_model_full.pickle'), include_position=True)
|
| 711 |
+
print("Model with position information saved successfully.")
|
| 712 |
+
|
| 713 |
+
print("Both models saved successfully.")
|
| 714 |
+
except Exception as e:
|
| 715 |
+
print(f"An error occurred while saving models: {str(e)}")
|
| 716 |
+
traceback.print_exc()
|
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import sklearn
|
| 3 |
+
from sklearn.metrics import roc_curve, auc
|
| 4 |
+
import sklearn.metrics
|
| 5 |
+
import sklearn.model_selection
|
| 6 |
+
import copy
|
| 7 |
+
from . import util
|
| 8 |
+
import time
|
| 9 |
+
# from . import metrics as ranking_metrics
|
| 10 |
+
# import azimuth.models.regression
|
| 11 |
+
# import azimuth.models.ensembles
|
| 12 |
+
# import azimuth.models.DNN
|
| 13 |
+
# import azimuth.models.baselines
|
| 14 |
+
import multiprocessing
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def fill_in_truth_and_predictions(truth, predictions, fold, y_all, y_pred, learn_options, test):
|
| 18 |
+
truth[fold]['ranks'] = np.hstack((truth[fold]['ranks'],
|
| 19 |
+
y_all[learn_options['rank-transformed target name']].values[test].flatten()))
|
| 20 |
+
|
| 21 |
+
truth[fold]['thrs'] = np.hstack((truth[fold]['thrs'],
|
| 22 |
+
y_all[learn_options['binary target name']].values[test].flatten()))
|
| 23 |
+
|
| 24 |
+
if 'raw_target_name' in list(learn_options.keys()):
|
| 25 |
+
truth[fold]['raw'] = np.hstack((truth[fold]['raw'],
|
| 26 |
+
y_all[learn_options['raw target name']].values[test].flatten()))
|
| 27 |
+
|
| 28 |
+
predictions[fold] = np.hstack((predictions[fold], y_pred.flatten()))
|
| 29 |
+
|
| 30 |
+
return truth, predictions
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def construct_filename(learn_options, TEST):
|
| 34 |
+
if "V" in learn_options:
|
| 35 |
+
filename = "V%s" % learn_options["V"]
|
| 36 |
+
else:
|
| 37 |
+
filename = "offV1"
|
| 38 |
+
|
| 39 |
+
if TEST:
|
| 40 |
+
filename = "TEST."
|
| 41 |
+
|
| 42 |
+
filename += learn_options["method"]
|
| 43 |
+
filename += '.order%d' % learn_options["order"]
|
| 44 |
+
# try:
|
| 45 |
+
# learn_options["target_name"] = ".%s" % learn_options["target_name"].split(" ")[1]
|
| 46 |
+
# except:
|
| 47 |
+
# pass
|
| 48 |
+
filename += learn_options["target_name"]
|
| 49 |
+
if learn_options["method"] == "GPy":
|
| 50 |
+
pass
|
| 51 |
+
# filename += ".R%d" % opt_options['num_restarts']
|
| 52 |
+
# filename += ".K%s" % learn_options['kerntype']
|
| 53 |
+
# if learn_options.has_key('degree'):
|
| 54 |
+
# filename += "d%d" % learn_options['degree']
|
| 55 |
+
# if learn_options['warped']:
|
| 56 |
+
# filename += ".Warp"
|
| 57 |
+
elif learn_options["method"] == "linreg":
|
| 58 |
+
filename += "." + learn_options["penalty"]
|
| 59 |
+
filename += "." + learn_options["cv"]
|
| 60 |
+
|
| 61 |
+
if learn_options["training_metric"] == "NDCG":
|
| 62 |
+
filename += ".NDGC_%d" % learn_options["NDGC_k"]
|
| 63 |
+
elif learn_options["training_metric"] == "AUC":
|
| 64 |
+
filename += ".AUC"
|
| 65 |
+
elif learn_options["training_metric"] == 'spearmanr':
|
| 66 |
+
filename += ".spearman"
|
| 67 |
+
|
| 68 |
+
print("filename = %s" % filename)
|
| 69 |
+
return filename
|
| 70 |
+
|
| 71 |
+
def print_summary(global_metric, results, learn_options, feature_sets, flags):
|
| 72 |
+
print("\nSummary:")
|
| 73 |
+
print(learn_options)
|
| 74 |
+
print("\t\tglobal %s=%.2f" % (learn_options['metric'], global_metric))
|
| 75 |
+
print("\t\tmedian %s across folds=%.2f" % (learn_options['metric'], np.median(results[0])))
|
| 76 |
+
print("\t\torder=%d" % learn_options["order"])
|
| 77 |
+
if 'kerntype' in learn_options: "\t\tkern type = %s" % learn_options['kerntype']
|
| 78 |
+
if 'degree' in learn_options: print("\t\tdegree=%d" % learn_options['degree'])
|
| 79 |
+
print("\t\ttarget_name=%s" % learn_options["target_name"])
|
| 80 |
+
|
| 81 |
+
for k in list(flags.keys()):
|
| 82 |
+
print('\t\t' + k + '=' + str(learn_options[k]))
|
| 83 |
+
|
| 84 |
+
print("\t\tfeature set:")
|
| 85 |
+
for set in list(feature_sets.keys()):
|
| 86 |
+
print("\t\t\t%s" % set)
|
| 87 |
+
print("\t\ttotal # features=%d" % results[4])
|
| 88 |
+
|
| 89 |
+
def extract_fpr_tpr_for_fold(aucs, fold, i, predictions, truth, y_binary, test, y_pred):
|
| 90 |
+
assert len(np.unique(y_binary))<=2, "if using AUC need binary targets"
|
| 91 |
+
fpr, tpr, _ = roc_curve(y_binary[test], y_pred)
|
| 92 |
+
roc_auc = auc(fpr, tpr)
|
| 93 |
+
aucs.append(roc_auc)
|
| 94 |
+
|
| 95 |
+
def extract_NDCG_for_fold(metrics, fold, i, predictions, truth, y_ground_truth, test, y_pred, learn_options):
|
| 96 |
+
NDCG_fold = ranking_metrics.ndcg_at_k_ties(y_ground_truth[test].flatten(), y_pred.flatten(), learn_options["NDGC_k"])
|
| 97 |
+
metrics.append(NDCG_fold)
|
| 98 |
+
|
| 99 |
+
def extract_spearman_for_fold(metrics, fold, i, predictions, truth, y_ground_truth, test, y_pred, learn_options):
|
| 100 |
+
spearman = util.spearmanr_nonan(y_ground_truth[test].flatten(), y_pred.flatten())[0]
|
| 101 |
+
assert not np.isnan(spearman), "found nan spearman"
|
| 102 |
+
metrics.append(spearman)
|
| 103 |
+
|
| 104 |
+
def get_train_test(test_gene, y_all, train_genes=None):
|
| 105 |
+
# this is a bit convoluted because the train_genes+test_genes may not add up to all genes
|
| 106 |
+
# for e.g. when we load up V3, but then use only V2, etc.
|
| 107 |
+
|
| 108 |
+
not_test = (y_all.index.get_level_values('Target gene').values != test_gene)
|
| 109 |
+
|
| 110 |
+
if train_genes is not None:
|
| 111 |
+
in_train_genes = np.zeros(not_test.shape, dtype=bool)
|
| 112 |
+
for t_gene in train_genes:
|
| 113 |
+
in_train_genes = np.logical_or(in_train_genes, (y_all.index.get_level_values('Target gene').values == t_gene))
|
| 114 |
+
train = np.logical_and(not_test, in_train_genes)
|
| 115 |
+
else:
|
| 116 |
+
train = not_test
|
| 117 |
+
#y_all['test'] as to do with extra pairs in V2
|
| 118 |
+
if test_gene == 'dummy':
|
| 119 |
+
test = train
|
| 120 |
+
else:
|
| 121 |
+
test = (y_all.index.get_level_values('Target gene').values== test_gene)
|
| 122 |
+
|
| 123 |
+
# convert to indices
|
| 124 |
+
test = np.where(test == True)[0]
|
| 125 |
+
train = np.where(train == True)[0]
|
| 126 |
+
return train, test
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def cross_validate(y_all, feature_sets, learn_options=None, TEST=False, train_genes=None, CV=True):
|
| 130 |
+
'''
|
| 131 |
+
feature_sets is a dictionary of "set name" to pandas.DataFrame
|
| 132 |
+
one set might be single-nucleotide, position-independent features of order X, for e.g.
|
| 133 |
+
Method: "GPy" or "linreg"
|
| 134 |
+
Metric: NDCG (learning to rank metric, Normalized Discounted Cumulative Gain); AUC
|
| 135 |
+
Output: cv_score_median, gene_rocs
|
| 136 |
+
When CV=False, it trains on everything (and tests on everything, just to fit the code)
|
| 137 |
+
'''
|
| 138 |
+
|
| 139 |
+
print("range of y_all is [%f, %f]" % (np.min(y_all[learn_options['target_name']].values), np.max(y_all[learn_options['target_name']].values)))
|
| 140 |
+
|
| 141 |
+
allowed_methods = ["GPy", "linreg", "AdaBoostRegressor", "AdaBoostClassifier",
|
| 142 |
+
"DecisionTreeRegressor", "RandomForestRegressor",
|
| 143 |
+
"ARDRegression", "GPy_fs", "mean", "random", "DNN",
|
| 144 |
+
"lasso_ensemble", "doench", "logregL1", "sgrna_from_doench", 'SVC', 'xu_et_al']
|
| 145 |
+
|
| 146 |
+
assert learn_options["method"] in allowed_methods,"invalid method: %s" % learn_options["method"]
|
| 147 |
+
assert learn_options["method"] == "linreg" and learn_options['penalty'] == 'L2' or learn_options["weighted"] is None, "weighted only works with linreg L2 right now"
|
| 148 |
+
|
| 149 |
+
# construct filename from options
|
| 150 |
+
filename = construct_filename(learn_options, TEST)
|
| 151 |
+
|
| 152 |
+
print("Cross-validating genes...")
|
| 153 |
+
t2 = time.time()
|
| 154 |
+
|
| 155 |
+
y = np.array(y_all[learn_options["target_name"]].values[:,None],dtype=np.float64)
|
| 156 |
+
|
| 157 |
+
# concatenate feature sets in to one nparray, and get dimension of each
|
| 158 |
+
inputs, dim, dimsum, feature_names = util.concatenate_feature_sets(feature_sets)
|
| 159 |
+
#import pickle; pickle.dump([y, inputs, feature_names, learn_options], open("saved_models/inputs.p", "wb" )); import ipdb; ipdb.set_trace()
|
| 160 |
+
|
| 161 |
+
if not CV:
|
| 162 |
+
assert learn_options['cv'] == 'gene', 'Must use gene-CV when CV is False (I need to use all of the genes and stratified complicates that)'
|
| 163 |
+
|
| 164 |
+
# set-up for cross-validation
|
| 165 |
+
## for outer loop, the one Doench et al use genes for
|
| 166 |
+
if learn_options["cv"] == "stratified":
|
| 167 |
+
assert "extra_pairs" not in learn_options or learn_options['extra pairs'], "can't use extra pairs with stratified CV, need to figure out how to properly account for genes affected by two drugs"
|
| 168 |
+
label_encoder = sklearn.preprocessing.LabelEncoder()
|
| 169 |
+
label_encoder.fit(y_all['Target gene'].values)
|
| 170 |
+
gene_classes = label_encoder.transform(y_all['Target gene'].values)
|
| 171 |
+
if 'n_folds' in list(learn_options.keys()):
|
| 172 |
+
n_folds = learn_options['n_folds']
|
| 173 |
+
elif learn_options['train_genes'] is not None and learn_options["test_genes"] is not None:
|
| 174 |
+
n_folds = len(learn_options["test_genes"])
|
| 175 |
+
else:
|
| 176 |
+
n_folds = len(learn_options['all_genes'])
|
| 177 |
+
|
| 178 |
+
cv = sklearn.cross_validation.StratifiedKFold(gene_classes, n_folds=n_folds, shuffle=True)
|
| 179 |
+
fold_labels = ["fold%d" % i for i in range(1,n_folds+1)]
|
| 180 |
+
if learn_options['num_genes_remove_train'] is not None:
|
| 181 |
+
raise NotImplementedException()
|
| 182 |
+
elif learn_options["cv"]=="gene":
|
| 183 |
+
cv = []
|
| 184 |
+
|
| 185 |
+
if not CV:
|
| 186 |
+
train_test_tmp = get_train_test('dummy', y_all) # get train, test split using a dummy gene
|
| 187 |
+
#train_tmp, test_tmp = train_test_tmp
|
| 188 |
+
# not a typo, using training set to test on as well, just for this case. Test set is not used
|
| 189 |
+
# for internal cross-val, etc. anyway.
|
| 190 |
+
#train_test_tmp = (train_tmp, train_tmp)
|
| 191 |
+
cv.append(train_test_tmp)
|
| 192 |
+
fold_labels = ["dummy_for_no_cv"]#learn_options['all_genes']
|
| 193 |
+
|
| 194 |
+
elif learn_options['train_genes'] is not None and learn_options["test_genes"] is not None:
|
| 195 |
+
assert learn_options['train_genes'] is not None and learn_options['test_genes'] is not None, "use both or neither"
|
| 196 |
+
for i, gene in enumerate(learn_options['test_genes']):
|
| 197 |
+
cv.append(get_train_test(gene, y_all, learn_options['train_genes']))
|
| 198 |
+
fold_labels = learn_options["test_genes"]
|
| 199 |
+
# if train and test genes are seperate, there should be only one fold
|
| 200 |
+
train_test_disjoint = set.isdisjoint(set(learn_options["train_genes"].tolist()), set(learn_options["test_genes"].tolist()))
|
| 201 |
+
|
| 202 |
+
else:
|
| 203 |
+
for i, gene in enumerate(learn_options['all_genes']):
|
| 204 |
+
train_test_tmp = get_train_test(gene, y_all)
|
| 205 |
+
cv.append(train_test_tmp)
|
| 206 |
+
fold_labels = learn_options['all_genes']
|
| 207 |
+
|
| 208 |
+
if learn_options['num_genes_remove_train'] is not None:
|
| 209 |
+
for i, (train,test) in enumerate(cv):
|
| 210 |
+
unique_genes = np.random.permutation(np.unique(np.unique(y_all['Target gene'][train])))
|
| 211 |
+
genes_to_keep = unique_genes[0:len(unique_genes) - learn_options['num_genes_remove_train']]
|
| 212 |
+
guides_to_keep = []
|
| 213 |
+
filtered_train = []
|
| 214 |
+
for j, gene in enumerate(y_all['Target gene']):
|
| 215 |
+
if j in train and gene in genes_to_keep:
|
| 216 |
+
filtered_train.append(j)
|
| 217 |
+
cv_i_orig = copy.deepcopy(cv[i])
|
| 218 |
+
cv[i] = (filtered_train, test)
|
| 219 |
+
if learn_options['num_genes_remove_train']==0:
|
| 220 |
+
assert np.all(cv_i_orig[0]==cv[i][0])
|
| 221 |
+
assert np.all(cv_i_orig[1]==cv[i][1])
|
| 222 |
+
print("# train/train after/before is %s, %s" % (len(cv[i][0]), len(cv_i_orig[0])))
|
| 223 |
+
print("# test/test after/before is %s, %s" % (len(cv[i][1]), len(cv_i_orig[1])))
|
| 224 |
+
else:
|
| 225 |
+
raise Exception("invalid cv options given: %s" % learn_options["cv"])
|
| 226 |
+
|
| 227 |
+
cv = [c for c in cv] #make list from generator, so can subset for TEST case
|
| 228 |
+
if TEST:
|
| 229 |
+
ind_to_use = [0]#[0,1]
|
| 230 |
+
cv = [cv[i] for i in ind_to_use]
|
| 231 |
+
fold_labels = [fold_labels[i] for i in ind_to_use]
|
| 232 |
+
|
| 233 |
+
truth = dict([(t, dict([(m, np.array([])) for m in ['raw', 'ranks', 'thrs']])) for t in fold_labels])
|
| 234 |
+
predictions = dict([(t, np.array([])) for t in fold_labels])
|
| 235 |
+
|
| 236 |
+
m = {}
|
| 237 |
+
metrics = []
|
| 238 |
+
|
| 239 |
+
#do the cross-validation
|
| 240 |
+
num_proc = learn_options["num_proc"]
|
| 241 |
+
if num_proc > 1:
|
| 242 |
+
num_proc = np.min([num_proc,len(cv)])
|
| 243 |
+
print("using multiprocessing with %d procs--one for each fold" % num_proc)
|
| 244 |
+
jobs = []
|
| 245 |
+
pool = multiprocessing.Pool(processes=num_proc)
|
| 246 |
+
for i,fold in enumerate(cv):
|
| 247 |
+
train,test = fold
|
| 248 |
+
print("working on fold %d of %d, with %d train and %d test" % (i, len(cv), len(train), len(test)))
|
| 249 |
+
if learn_options["method"]=="GPy":
|
| 250 |
+
job = pool.apply_async(azimuth.models.GP.gp_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 251 |
+
elif learn_options["method"]=="linreg":
|
| 252 |
+
job = pool.apply_async(azimuth.models.regression.linreg_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 253 |
+
elif learn_options["method"]=="logregL1":
|
| 254 |
+
job = pool.apply_async(azimuth.models.regression.logreg_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 255 |
+
elif learn_options["method"]=="AdaBoostRegressor":
|
| 256 |
+
job = pool.apply_async(azimuth.models.ensembles.adaboost_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options, False))
|
| 257 |
+
elif learn_options["method"]=="AdaBoostClassifier":
|
| 258 |
+
job = pool.apply_async(azimuth.models.ensembles.adaboost_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options, True))
|
| 259 |
+
elif learn_options["method"]=="DecisionTreeRegressor":
|
| 260 |
+
job = pool.apply_async(azimuth.models.ensembles.decisiontree_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 261 |
+
elif learn_options["method"]=="RandomForestRegressor":
|
| 262 |
+
job = pool.apply_async(azimuth.models.ensembles.randomforest_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 263 |
+
elif learn_options["method"]=="ARDRegression":
|
| 264 |
+
job = pool.apply_async(azimuth.models.regression.ARDRegression_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 265 |
+
elif learn_options["method"] == "random":
|
| 266 |
+
job = pool.apply_async(azimuth.models.baselines.random_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 267 |
+
elif learn_options["method"] == "mean":
|
| 268 |
+
job = pool.apply_async(azimuth.models.baselines.mean_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 269 |
+
elif learn_options["method"] == "SVC":
|
| 270 |
+
job = pool.apply_async(azimuth.models.baselines.SVC_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 271 |
+
elif learn_options["method"] == "DNN":
|
| 272 |
+
job = pool.apply_async(azimuth.models.DNN.DNN_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 273 |
+
elif learn_options["method"] == "lasso_ensemble":
|
| 274 |
+
job = pool.apply_async(azimuth.models.ensembles.LASSOs_ensemble_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 275 |
+
elif learn_options["method"] == "doench":
|
| 276 |
+
job = pool.apply_async(azimuth.models.baselines.doench_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 277 |
+
elif learn_options["method"] == "sgrna_from_doench":
|
| 278 |
+
job = pool.apply_async(azimuth.models.baselines.sgrna_from_doench_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 279 |
+
elif learn_options["method"] == "xu_et_al":
|
| 280 |
+
job = pool.apply_async(azimuth.models.baselines.xu_et_al_on_fold, args=(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options))
|
| 281 |
+
else:
|
| 282 |
+
raise Exception("did not find method=%s" % learn_options["method"])
|
| 283 |
+
jobs.append(job)
|
| 284 |
+
pool.close()
|
| 285 |
+
pool.join()
|
| 286 |
+
for i,fold in enumerate(cv):#i in range(0,len(jobs)):
|
| 287 |
+
y_pred, m[i] = jobs[i].get()
|
| 288 |
+
train,test = fold
|
| 289 |
+
|
| 290 |
+
if learn_options["training_metric"]=="AUC":
|
| 291 |
+
extract_fpr_tpr_for_fold(metrics, fold_labels[i], i, predictions, truth, y_all[learn_options["ground_truth_label"]].values, test, y_pred)
|
| 292 |
+
elif learn_options["training_metric"]=="NDCG":
|
| 293 |
+
extract_NDCG_for_fold(metrics, fold_labels[i], i, predictions, truth, y_all[learn_options["ground_truth_label"]].values, test, y_pred, learn_options)
|
| 294 |
+
elif learn_options["training_metric"] == 'spearmanr':
|
| 295 |
+
extract_spearman_for_fold(metrics, fold_labels[i], i, predictions, truth, y_all[learn_options["ground_truth_label"]].values, test, y_pred, learn_options)
|
| 296 |
+
else:
|
| 297 |
+
raise Exception("invalid 'training_metric' in learn_options: %s" % learn_options["training_metric"])
|
| 298 |
+
|
| 299 |
+
truth, predictions = fill_in_truth_and_predictions(truth, predictions, fold_labels[i], y_all, y_pred, learn_options, test)
|
| 300 |
+
|
| 301 |
+
pool.terminate()
|
| 302 |
+
|
| 303 |
+
else:
|
| 304 |
+
# non parallel version
|
| 305 |
+
for i,fold in enumerate(cv):
|
| 306 |
+
train,test = fold
|
| 307 |
+
if learn_options["method"]=="GPy":
|
| 308 |
+
y_pred, m[i] = gp_on_fold(azimuth.models.GP.feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 309 |
+
elif learn_options["method"]=="linreg":
|
| 310 |
+
y_pred, m[i] = azimuth.models.regression.linreg_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 311 |
+
elif learn_options["method"]=="logregL1":
|
| 312 |
+
y_pred, m[i] = azimuth.models.regression.logreg_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 313 |
+
elif learn_options["method"]=="AdaBoostRegressor":
|
| 314 |
+
y_pred, m[i] = azimuth.models.ensembles.adaboost_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options, classification=False)
|
| 315 |
+
elif learn_options["method"]=="AdaBoostClassifier":
|
| 316 |
+
y_pred, m[i] = azimuth.models.ensembles.adaboost_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options, classification=True)
|
| 317 |
+
elif learn_options["method"]=="DecisionTreeRegressor":
|
| 318 |
+
y_pred, m[i] = azimuth.models.ensembles.decisiontree_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 319 |
+
elif learn_options["method"]=="RandomForestRegressor":
|
| 320 |
+
y_pred, m[i] = azimuth.models.ensembles.randomforest_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 321 |
+
elif learn_options["method"]=="ARDRegression":
|
| 322 |
+
y_pred, m[i] = azimuth.models.regression.ARDRegression_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 323 |
+
elif learn_options["method"]=="GPy_fs":
|
| 324 |
+
y_pred, m[i] = azimuth.models.GP.gp_with_fs_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 325 |
+
elif learn_options["method"] == "random":
|
| 326 |
+
y_pred, m[i] = azimuth.models.baselines.random_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 327 |
+
elif learn_options["method"] == "mean":
|
| 328 |
+
y_pred, m[i] = azimuth.models.baselines.mean_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 329 |
+
elif learn_options["method"] == "SVC":
|
| 330 |
+
y_pred, m[i] = azimuth.models.baselines.SVC_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 331 |
+
elif learn_options["method"] == "DNN":
|
| 332 |
+
y_pred, m[i] = azimuth.models.DNN.DNN_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 333 |
+
elif learn_options["method"] == "lasso_ensemble":
|
| 334 |
+
y_pred, m[i] = azimuth.models.ensembles.LASSOs_ensemble_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 335 |
+
elif learn_options["method"] == "doench":
|
| 336 |
+
y_pred, m[i] = azimuth.models.baselines.doench_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 337 |
+
elif learn_options["method"] == "sgrna_from_doench":
|
| 338 |
+
y_pred, m[i] = azimuth.models.baselines.sgrna_from_doench_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 339 |
+
elif learn_options["method"] == "xu_et_al":
|
| 340 |
+
y_pred, m[i] = azimuth.models.baselines.xu_et_al_on_fold(feature_sets, train, test, y, y_all, inputs, dim, dimsum, learn_options)
|
| 341 |
+
else:
|
| 342 |
+
raise Exception("invalid method found: %s" % learn_options["method"])
|
| 343 |
+
|
| 344 |
+
if learn_options["training_metric"]=="AUC":
|
| 345 |
+
# fills in truth and predictions
|
| 346 |
+
extract_fpr_tpr_for_fold(metrics, fold_labels[i], i, predictions, truth, y_all[learn_options['ground_truth_label']].values, test, y_pred)
|
| 347 |
+
elif learn_options["training_metric"]=="NDCG":
|
| 348 |
+
extract_NDCG_for_fold(metrics, fold_labels[i], i, predictions, truth, y_all[learn_options["ground_truth_label"]].values, test, y_pred, learn_options)
|
| 349 |
+
elif learn_options["training_metric"] == 'spearmanr':
|
| 350 |
+
extract_spearman_for_fold(metrics, fold_labels[i], i, predictions, truth, y_all[learn_options["ground_truth_label"]].values, test, y_pred, learn_options)
|
| 351 |
+
|
| 352 |
+
truth, predictions = fill_in_truth_and_predictions(truth, predictions, fold_labels[i], y_all, y_pred, learn_options, test)
|
| 353 |
+
|
| 354 |
+
print("\t\tRMSE: ", np.sqrt(((y_pred - y[test])**2).mean()))
|
| 355 |
+
print("\t\tSpearman correlation: ", util.spearmanr_nonan(y[test], y_pred)[0])
|
| 356 |
+
print("\t\tfinished fold/gene %i of %i" % (i+1, len(fold_labels)))
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
cv_median_metric =[np.median(metrics)]
|
| 360 |
+
gene_pred = [(truth, predictions)]
|
| 361 |
+
print("\t\tmedian %s across gene folds: %.3f" % (learn_options["training_metric"], cv_median_metric[-1]))
|
| 362 |
+
|
| 363 |
+
t3 = time.time()
|
| 364 |
+
print("\t\tElapsed time for cv is %.2f seconds" % (t3-t2))
|
| 365 |
+
return metrics, gene_pred, fold_labels, m, dimsum, filename, feature_names
|
|
Binary file
|
|
|
|
Binary file
|
|
|
|
@@ -0,0 +1,1331 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas
|
| 2 |
+
# import matplotlib.pylab as plt
|
| 3 |
+
# import pylab as pl # so can just grab qqplotting code from fastlmm directly
|
| 4 |
+
import scipy.stats
|
| 5 |
+
import scipy as sp
|
| 6 |
+
import numpy as np
|
| 7 |
+
import itertools
|
| 8 |
+
import sklearn.metrics
|
| 9 |
+
import Bio.SeqUtils.MeltingTemp as Tm
|
| 10 |
+
import Bio.Entrez as Entrez
|
| 11 |
+
import Bio.SeqUtils as SeqUtil
|
| 12 |
+
# from azimuth.features import microhomology
|
| 13 |
+
from Bio import SeqIO
|
| 14 |
+
# from . import metrics as ranking_metrics
|
| 15 |
+
import os
|
| 16 |
+
import pickle
|
| 17 |
+
import glob
|
| 18 |
+
#import azimuth
|
| 19 |
+
# import azimuth.models
|
| 20 |
+
#import azimuth.models.ensembles as ensembles
|
| 21 |
+
import Bio.Seq as Seq
|
| 22 |
+
import time
|
| 23 |
+
import scipy.stats as st
|
| 24 |
+
from . import util
|
| 25 |
+
import sys
|
| 26 |
+
import pandas as pd
|
| 27 |
+
# from . import corrstats
|
| 28 |
+
|
| 29 |
+
# def qqplot(pvals, fileout = None, alphalevel = 0.05,legend=None,xlim=None,ylim=None,fixaxes=True,addlambda=True,minpval=1e-20,title=None,h1=None,figsize=[5,5],grid=True, markersize=2):
|
| 30 |
+
# '''
|
| 31 |
+
# performs a P-value QQ-plot in -log10(P-value) space
|
| 32 |
+
# -----------------------------------------------------------------------
|
| 33 |
+
# Args:
|
| 34 |
+
# pvals P-values, for multiple methods this should be a list (each element will be flattened)
|
| 35 |
+
# fileout if specified, the plot will be saved to the file (optional)
|
| 36 |
+
# alphalevel significance level for the error bars (default 0.05)
|
| 37 |
+
# if None: no error bars are plotted
|
| 38 |
+
# legend legend string. For multiple methods this should be a list
|
| 39 |
+
# xlim X-axis limits for the QQ-plot (unit: -log10)
|
| 40 |
+
# ylim Y-axis limits for the QQ-plot (unit: -log10)
|
| 41 |
+
# fixaxes Makes xlim=0, and ylim=max of the two ylimits, so that plot is square
|
| 42 |
+
# addlambda Compute and add genomic control to the plot, bool
|
| 43 |
+
# title plot title, string (default: empty)
|
| 44 |
+
# h1 figure handle (default None)
|
| 45 |
+
# figsize size of the figure. (default: [5,5])
|
| 46 |
+
# grid boolean: use a grid? (default: True)
|
| 47 |
+
# Returns: fighandle, qnull, qemp
|
| 48 |
+
# -----------------------------------------------------------------------
|
| 49 |
+
# '''
|
| 50 |
+
# distr = 'log10'
|
| 51 |
+
# import pylab as pl
|
| 52 |
+
# if type(pvals)==list:
|
| 53 |
+
# pvallist=pvals
|
| 54 |
+
# else:
|
| 55 |
+
# pvallist = [pvals]
|
| 56 |
+
# if type(legend)==list:
|
| 57 |
+
# legendlist=legend
|
| 58 |
+
# else:
|
| 59 |
+
# legendlist = [legend]
|
| 60 |
+
#
|
| 61 |
+
# if h1 is None:
|
| 62 |
+
# h1=pl.figure(figsize=figsize)
|
| 63 |
+
#
|
| 64 |
+
# pl.grid(b=grid, alpha = 0.5)
|
| 65 |
+
#
|
| 66 |
+
# maxval = 0
|
| 67 |
+
#
|
| 68 |
+
# for i in range(len(pvallist)):
|
| 69 |
+
# pval =pvallist[i].flatten()
|
| 70 |
+
# M = pval.shape[0]
|
| 71 |
+
# pnull = (0.5 + sp.arange(M))/M
|
| 72 |
+
# # pnull = np.sort(np.random.uniform(size = tests))
|
| 73 |
+
#
|
| 74 |
+
# pval[pval<minpval]=minpval
|
| 75 |
+
# pval[pval>=1]=1
|
| 76 |
+
#
|
| 77 |
+
# if distr == 'chi2':
|
| 78 |
+
# qnull = st.chi2.isf(pnull, 1)
|
| 79 |
+
# qemp = (st.chi2.isf(sp.sort(pval),1))
|
| 80 |
+
# xl = 'LOD scores'
|
| 81 |
+
# yl = '$\chi^2$ quantiles'
|
| 82 |
+
#
|
| 83 |
+
# if distr == 'log10':
|
| 84 |
+
# qnull = -sp.log10(pnull)
|
| 85 |
+
# qemp = -sp.log10(sp.sort(pval)) #sorts the object, returns nothing
|
| 86 |
+
# xl = '-log10(P) observed'
|
| 87 |
+
# yl = '-log10(P) expected'
|
| 88 |
+
# if not (sp.isreal(qemp)).all(): raise Exception("imaginary qemp found")
|
| 89 |
+
# if qnull.max>maxval:
|
| 90 |
+
# maxval = qnull.max()
|
| 91 |
+
# pl.plot(qnull, qemp, '.', markersize=markersize)
|
| 92 |
+
# #pl.plot([0,qemp.max()], [0,qemp.max()],'r')
|
| 93 |
+
# if addlambda:
|
| 94 |
+
# lambda_gc = estimate_lambda(pval)
|
| 95 |
+
# print("lambda=%1.4f" % lambda_gc)
|
| 96 |
+
# #pl.legend(["gc="+ '%1.3f' % lambda_gc],loc=2)
|
| 97 |
+
# # if there's only one method, just print the lambda
|
| 98 |
+
# if len(pvallist) == 1:
|
| 99 |
+
# legendlist=["$\lambda_{GC}=$%1.4f" % lambda_gc]
|
| 100 |
+
# # otherwise add it at the end of the name
|
| 101 |
+
# else:
|
| 102 |
+
# legendlist[i] = legendlist[i] + " ($\lambda_{GC}=$%1.4f)" % lambda_gc
|
| 103 |
+
#
|
| 104 |
+
# addqqplotinfo(qnull,M,xl,yl,xlim,ylim,alphalevel,legendlist,fixaxes)
|
| 105 |
+
#
|
| 106 |
+
# if title is not None:
|
| 107 |
+
# pl.title(title)
|
| 108 |
+
#
|
| 109 |
+
# if fileout is not None:
|
| 110 |
+
# pl.savefig(fileout)
|
| 111 |
+
#
|
| 112 |
+
# return h1,qnull, qemp,
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# def qqplotp(pv,fileout = None, alphalevel = 0.05,legend=None,xlim=None,ylim=None,ycoord=10,plotsize="652x526",title=None,dohist=True, numbins=50, figsize=[5,5], markersize=2):
|
| 116 |
+
# '''
|
| 117 |
+
# Read in p-values from filein and make a qqplot adn histogram.
|
| 118 |
+
# If fileout is provided, saves the qqplot only at present.
|
| 119 |
+
# Searches through p until one is found. '''
|
| 120 |
+
#
|
| 121 |
+
# import pylab as pl
|
| 122 |
+
# pl.ion()
|
| 123 |
+
#
|
| 124 |
+
# fs=8
|
| 125 |
+
# h1=qqplot(pv, fileout, alphalevel,legend,xlim,ylim,addlambda=True, figsize=figsize, markersize=markersize)
|
| 126 |
+
# #lambda_gc=estimate_lambda(pv)
|
| 127 |
+
# #pl.legend(["gc="+ '%1.3f' % lambda_gc],loc=2)
|
| 128 |
+
# pl.title(title,fontsize=fs)
|
| 129 |
+
#
|
| 130 |
+
# wm=pl.get_current_fig_manager()
|
| 131 |
+
# #e.g. "652x526+100+10
|
| 132 |
+
# xcoord=100
|
| 133 |
+
# #wm.window.wm_geometry(plotsize + "+" + str(xcoord) + "+" + str(ycoord))
|
| 134 |
+
#
|
| 135 |
+
# if dohist:
|
| 136 |
+
# h2=pvalhist(pv, numbins=numbins, figsize=figsize)
|
| 137 |
+
# pl.title(title,fontsize=fs)
|
| 138 |
+
# #wm=pl.get_current_fig_manager()
|
| 139 |
+
# width_height=plotsize.split("x")
|
| 140 |
+
# buffer=10
|
| 141 |
+
# xcoord=int(xcoord + float(width_height[0])+buffer)
|
| 142 |
+
# #wm.window.wm_geometry(plotsize + "+" + str(xcoord) + "+" + str(ycoord))
|
| 143 |
+
# else: h2=None
|
| 144 |
+
#
|
| 145 |
+
# return h1,h2
|
| 146 |
+
|
| 147 |
+
# def addqqplotinfo(qnull,M,xl='-log10(P) observed',yl='-log10(P) expected',xlim=None,ylim=None,alphalevel=0.05,legendlist=None,fixaxes=False):
|
| 148 |
+
# distr='log10'
|
| 149 |
+
# pl.plot([0,qnull.max()], [0,qnull.max()],'k')
|
| 150 |
+
# pl.ylabel(xl)
|
| 151 |
+
# pl.xlabel(yl)
|
| 152 |
+
# if xlim is not None:
|
| 153 |
+
# pl.xlim(xlim)
|
| 154 |
+
# if ylim is not None:
|
| 155 |
+
# pl.ylim(ylim)
|
| 156 |
+
# if alphalevel is not None:
|
| 157 |
+
# if distr == 'log10':
|
| 158 |
+
# betaUp, betaDown, theoreticalPvals = _qqplot_bar(M=M,alphalevel=alphalevel,distr=distr)
|
| 159 |
+
# lower = -sp.log10(theoreticalPvals-betaDown)
|
| 160 |
+
# upper = -sp.log10(theoreticalPvals+betaUp)
|
| 161 |
+
# pl.fill_between(-sp.log10(theoreticalPvals),lower,upper,color="grey",alpha=0.5)
|
| 162 |
+
# #pl.plot(-sp.log10(theoreticalPvals),lower,'g-.')
|
| 163 |
+
# #pl.plot(-sp.log10(theoreticalPvals),upper,'g-.')
|
| 164 |
+
# if legendlist is not None:
|
| 165 |
+
# leg = pl.legend(legendlist, loc=4, numpoints=1)
|
| 166 |
+
# # set the markersize for the legend
|
| 167 |
+
# for lo in leg.legendHandles:
|
| 168 |
+
# lo.set_markersize(10)
|
| 169 |
+
#
|
| 170 |
+
# if fixaxes:
|
| 171 |
+
# fix_axes()
|
| 172 |
+
|
| 173 |
+
def _qqplot_bar(M=1000000, alphalevel = 0.05,distr = 'log10'):
|
| 174 |
+
'''
|
| 175 |
+
calculate error bars for a QQ-plot
|
| 176 |
+
--------------------------------------------------------------------
|
| 177 |
+
Input:
|
| 178 |
+
------------- ----------------------------------------------------
|
| 179 |
+
M number of points to compute error bars
|
| 180 |
+
alphalevel significance level for the error bars (default 0.05)
|
| 181 |
+
distr space in which the error bars are implemented
|
| 182 |
+
Note only log10 is implemented (default 'log10')
|
| 183 |
+
--------------------------------------------------------------------
|
| 184 |
+
Returns:
|
| 185 |
+
------------- ----------------------------------------------------
|
| 186 |
+
betaUp upper error bars
|
| 187 |
+
betaDown lower error bars
|
| 188 |
+
theoreticalPvals theoretical P-values under uniform
|
| 189 |
+
--------------------------------------------------------------------
|
| 190 |
+
'''
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
#assumes 'log10'
|
| 194 |
+
|
| 195 |
+
mRange=10**(sp.arange(sp.log10(0.5),sp.log10(M-0.5)+0.1,0.1));#should be exp or 10**?
|
| 196 |
+
numPts=len(mRange);
|
| 197 |
+
betaalphaLevel=sp.zeros(numPts);#down in the plot
|
| 198 |
+
betaOneMinusalphaLevel=sp.zeros(numPts);#up in the plot
|
| 199 |
+
betaInvHalf=sp.zeros(numPts);
|
| 200 |
+
for n in range(numPts):
|
| 201 |
+
m=mRange[n]; #numplessThanThresh=m;
|
| 202 |
+
betaInvHalf[n]=st.beta.ppf(0.5,m,M-m);
|
| 203 |
+
betaalphaLevel[n]=st.beta.ppf(alphalevel,m,M-m);
|
| 204 |
+
betaOneMinusalphaLevel[n]=st.beta.ppf(1-alphalevel,m,M-m);
|
| 205 |
+
pass
|
| 206 |
+
betaDown=betaInvHalf-betaalphaLevel;
|
| 207 |
+
betaUp=betaOneMinusalphaLevel-betaInvHalf;
|
| 208 |
+
|
| 209 |
+
theoreticalPvals=mRange/M;
|
| 210 |
+
return betaUp, betaDown, theoreticalPvals
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
# def fix_axes(buffer=0.1):
|
| 215 |
+
# '''
|
| 216 |
+
# Makes x and y max the same, and the lower limits 0.
|
| 217 |
+
# '''
|
| 218 |
+
# maxlim=max(pl.xlim()[1],pl.ylim()[1])
|
| 219 |
+
# pl.xlim([0-buffer,maxlim+buffer])
|
| 220 |
+
# pl.ylim([0-buffer,maxlim+buffer])
|
| 221 |
+
|
| 222 |
+
def estimate_lambda(pv):
|
| 223 |
+
'''
|
| 224 |
+
estimate the lambda for a given array of P-values
|
| 225 |
+
------------------------------------------------------------------
|
| 226 |
+
pv numpy array containing the P-values
|
| 227 |
+
------------------------------------------------------------------
|
| 228 |
+
L lambda value
|
| 229 |
+
------------------------------------------------------------------
|
| 230 |
+
'''
|
| 231 |
+
LOD2 = sp.median(st.chi2.isf(pv, 1))
|
| 232 |
+
L = (LOD2/0.456)
|
| 233 |
+
return L
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# def pvalhist(pv,numbins=50,linewidth=3.0,linespec='--r', figsize=[5,5]):
|
| 237 |
+
# '''
|
| 238 |
+
# Plots normalized histogram, plus theoretical null-only line.
|
| 239 |
+
# '''
|
| 240 |
+
# h2=pl.figure(figsize=figsize)
|
| 241 |
+
# [nn,bins,patches]=pl.hist(pv,numbins,normed=True)
|
| 242 |
+
# pl.plot([0, 1],[1,1],linespec,linewidth=linewidth)
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
def get_pval_from_predictions(m0_predictions, m1_predictions, ground_truth, twotailed=False, method='steiger'):
|
| 247 |
+
'''
|
| 248 |
+
If twotailed==False, then need to check that the one of corr0 and corr1 that is higher is the correct one
|
| 249 |
+
'''
|
| 250 |
+
from . import corrstats
|
| 251 |
+
n0 = len(m0_predictions)
|
| 252 |
+
n1 = len(m1_predictions)
|
| 253 |
+
n2 = len(ground_truth)
|
| 254 |
+
assert(n0==n1)
|
| 255 |
+
assert(n0==n2)
|
| 256 |
+
corr0, _ = scipy.stats.spearmanr(m0_predictions, ground_truth)
|
| 257 |
+
corr1, _ = scipy.stats.spearmanr(m1_predictions, ground_truth)
|
| 258 |
+
corr01, _ =scipy.stats.spearmanr(m0_predictions, m1_predictions)
|
| 259 |
+
t2, pv = corrstats.dependent_corr(corr0, corr1, corr01, n0, twotailed=twotailed, method=method)
|
| 260 |
+
return t2, pv, corr0, corr1, corr01
|
| 261 |
+
|
| 262 |
+
def get_thirty_one_mer_data():
|
| 263 |
+
'''
|
| 264 |
+
Load up our processed data file for all of V1 and V2, make a 31mer so that
|
| 265 |
+
we can use the SSC trained model to compare to
|
| 266 |
+
Assumes we call this from the analysis subdirectory
|
| 267 |
+
'''
|
| 268 |
+
myfile = r"..\data\FC_RES_5304.csv"
|
| 269 |
+
newfile = r"..\data\FC_RES_5304_w_31mer.csv"
|
| 270 |
+
data = pd.read_csv(myfile)
|
| 271 |
+
thirty_one_mer = []
|
| 272 |
+
for i in range(data.shape[0]):
|
| 273 |
+
thirty_one_mer.append(convert_to_thirty_one(data.iloc[i]["30mer"], data.iloc[i]["Target"], data.iloc[i]["Strand"]))
|
| 274 |
+
data["31mer"] = thirty_one_mer
|
| 275 |
+
data.to_csv(newfile)
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def guide_positional_features(guide_seq, gene, strand):
|
| 279 |
+
"""
|
| 280 |
+
Given a guide sequence, a gene name, and strand (e.g. "sense"), return the (absolute) nucleotide cut position, and the percent amino acid.
|
| 281 |
+
From John's email:
|
| 282 |
+
the cut site is always 3nts upstream of the NGG PAM:
|
| 283 |
+
5' - 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 <cut> 18 19 20 N G G - 3'
|
| 284 |
+
To calculate percent protein, we determined what amino acid number was being cut and just divided by the total number of amino acids. In the case where the cutsite was between two amino acid codons, I believe we rounded down
|
| 285 |
+
|
| 286 |
+
"""
|
| 287 |
+
|
| 288 |
+
guide_seq = Seq.Seq(guide_seq)
|
| 289 |
+
gene_seq = Seq.Seq(util.get_gene_sequence(gene)).reverse_complement()
|
| 290 |
+
if strand=='sense':
|
| 291 |
+
guide_seq = guide_seq.reverse_complement()
|
| 292 |
+
ind = gene_seq.find(guide_seq)
|
| 293 |
+
if ind ==-1:
|
| 294 |
+
print("returning None, could not find guide %s in gene %s" % (guide_seq, gene))
|
| 295 |
+
return ""
|
| 296 |
+
assert gene_seq[ind:(ind+len(guide_seq))]==guide_seq, "match not right"
|
| 297 |
+
## now get what we want from this:
|
| 298 |
+
import ipdb; ipdb.set_trace()
|
| 299 |
+
raise NotImplementedError("incomplete implentation for now")
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def convert_to_thirty_one(guide_seq, gene, strand):
|
| 303 |
+
'''
|
| 304 |
+
Given a guide sequence, a gene name, and strand (e.g. "sense"), return a 31mer string which is our 30mer,
|
| 305 |
+
plus one more at the end.
|
| 306 |
+
'''
|
| 307 |
+
guide_seq = Seq.Seq(guide_seq)
|
| 308 |
+
gene_seq = Seq.Seq(get_gene_sequence(gene)).reverse_complement()
|
| 309 |
+
if strand=='sense':
|
| 310 |
+
guide_seq = guide_seq.reverse_complement()
|
| 311 |
+
ind = gene_seq.find(guide_seq)
|
| 312 |
+
if ind ==-1:
|
| 313 |
+
print("returning sequence+'A', could not find guide %s in gene %s" % (guide_seq, gene))
|
| 314 |
+
return gene_seq + 'A'
|
| 315 |
+
assert gene_seq[ind:(ind+len(guide_seq))]==guide_seq, "match not right"
|
| 316 |
+
#new_mer = gene_seq[ind:(ind+len(guide_seq))+1] #looks correct, but is wrong, due to strand frame-of-reference
|
| 317 |
+
new_mer = gene_seq[(ind-1):(ind+len(guide_seq))] #this actually tacks on an extra one at the end for some reason
|
| 318 |
+
if strand=='sense':
|
| 319 |
+
new_mer = new_mer.reverse_complement()
|
| 320 |
+
return str(new_mer)
|
| 321 |
+
|
| 322 |
+
def concatenate_feature_sets(feature_sets, keys=None):
|
| 323 |
+
'''
|
| 324 |
+
Given a dictionary of sets of features, each in a Pandas.DataFrame,
|
| 325 |
+
concatenate them together to form one big np.array, and get the dimension
|
| 326 |
+
of each set
|
| 327 |
+
Returns: inputs, dim
|
| 328 |
+
'''
|
| 329 |
+
assert feature_sets != {}, "no feature sets present"
|
| 330 |
+
if keys is None:
|
| 331 |
+
keys = list(feature_sets.keys())
|
| 332 |
+
|
| 333 |
+
F = feature_sets[keys[0]].shape[0]
|
| 334 |
+
for set in list(feature_sets.keys()):
|
| 335 |
+
F2 = feature_sets[set].shape[0]
|
| 336 |
+
assert F == F2, "not same # individuals for features %s and %s" % (keys[0], set)
|
| 337 |
+
|
| 338 |
+
N = feature_sets[keys[0]].shape[0]
|
| 339 |
+
inputs = np.zeros((N, 0))
|
| 340 |
+
feature_names = []
|
| 341 |
+
dim = {}
|
| 342 |
+
dimsum = 0
|
| 343 |
+
for set in keys:
|
| 344 |
+
inputs_set = feature_sets[set].values
|
| 345 |
+
dim[set] = inputs_set.shape[1]
|
| 346 |
+
dimsum += dim[set]
|
| 347 |
+
inputs = np.hstack((inputs, inputs_set))
|
| 348 |
+
feature_names.extend(feature_sets[set].columns.tolist())
|
| 349 |
+
|
| 350 |
+
if False:
|
| 351 |
+
inputs.shape
|
| 352 |
+
for j in keys: print(j + str(feature_sets[j].shape))
|
| 353 |
+
import ipdb; ipdb.set_trace()
|
| 354 |
+
|
| 355 |
+
#print "final size of inputs matrix is (%d, %d)" % inputs.shape
|
| 356 |
+
return inputs, dim, dimsum, feature_names
|
| 357 |
+
|
| 358 |
+
def extract_individual_level_data(one_result):
|
| 359 |
+
'''
|
| 360 |
+
Extract predictions and truth for each fold
|
| 361 |
+
Returns: ranks, predictions
|
| 362 |
+
|
| 363 |
+
assumes that results here is the value for a results dictionary for one key, i.e. one entry in a dictionary loaded up from saved results with pickle
|
| 364 |
+
e.g. all_results, all_learn_options = pickle.load(some_results_file)
|
| 365 |
+
then call extract_individual_level_data(one_results = all_results['firstkey'])
|
| 366 |
+
then, one_results contains: metrics, gene_pred, fold_labels, m, dimsum, filename, feature_names
|
| 367 |
+
'''
|
| 368 |
+
metrics, gene_pred, fold_labels, m, dimsum, filename, feature_names = one_result
|
| 369 |
+
all_true_ranks = np.empty(0)
|
| 370 |
+
all_pred = np.empty(0)
|
| 371 |
+
for f in list(fold_labels):
|
| 372 |
+
these_ranks = gene_pred[0][0][f]['ranks'] #similar for thrs
|
| 373 |
+
these_pred = gene_pred[0][1][f]
|
| 374 |
+
all_true_ranks = np.concatenate((all_true_ranks, these_ranks))
|
| 375 |
+
all_pred = np.concatenate((all_pred, these_pred))
|
| 376 |
+
return all_true_ranks, all_pred
|
| 377 |
+
|
| 378 |
+
def spearmanr_nonan(x,y):
|
| 379 |
+
'''
|
| 380 |
+
same as scipy.stats.spearmanr, but if all values are unique, returns 0 instead of nan
|
| 381 |
+
(Output: rho, pval)
|
| 382 |
+
'''
|
| 383 |
+
r, p = st.spearmanr(x, y)
|
| 384 |
+
if np.isnan(p):
|
| 385 |
+
if len(np.unique(x))==1 or len(np.unique(y))==1:
|
| 386 |
+
print("WARNING: spearmanr is nan due to unique values, setting to 0")
|
| 387 |
+
p = 0.0
|
| 388 |
+
r = 0.0
|
| 389 |
+
else:
|
| 390 |
+
raise Exception("found nan spearman")
|
| 391 |
+
assert not np.isnan(r)
|
| 392 |
+
return r, p
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
def impute_gene_position(gene_position):
|
| 397 |
+
'''
|
| 398 |
+
Some amino acid cut position and percent peptide are blank because of stop codons, but
|
| 399 |
+
we still want a number for these, so just set them to 101 as a proxy
|
| 400 |
+
'''
|
| 401 |
+
|
| 402 |
+
gene_position['Percent Peptide'] = gene_position['Percent Peptide'].fillna(101.00)
|
| 403 |
+
|
| 404 |
+
if 'Amino Acid Cut position' in gene_position.columns:
|
| 405 |
+
gene_position['Amino Acid Cut position'] = gene_position['Amino Acid Cut position'].fillna(gene_position['Amino Acid Cut position'].mean())
|
| 406 |
+
|
| 407 |
+
return gene_position
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
def datestamp(appendrandom=False):
|
| 411 |
+
import datetime
|
| 412 |
+
now = datetime.datetime.now()
|
| 413 |
+
s = str(now)[:19].replace(" ","_").replace(":","_")
|
| 414 |
+
if appendrandom:
|
| 415 |
+
import random
|
| 416 |
+
s += "_" + str(random.random())[2:]
|
| 417 |
+
return s
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
def get_gene_sequence(gene_name):
|
| 421 |
+
try:
|
| 422 |
+
gene_file = '../../gene_sequences/%s_sequence.txt' % gene_name
|
| 423 |
+
#gene_file = '../gene_sequences/%s_sequence.txt' % gene_name
|
| 424 |
+
#gene_file = 'gene_sequences/%s_sequence.txt' % gene_name
|
| 425 |
+
with open(gene_file, 'rb') as f:
|
| 426 |
+
seq = f.read()
|
| 427 |
+
seq = seq.replace('\r\n', '')
|
| 428 |
+
except:
|
| 429 |
+
raise Exception("could not find gene sequence file %s, please see examples and generate one for your gene as needed, with this filename" % gene_file)
|
| 430 |
+
|
| 431 |
+
return seq
|
| 432 |
+
|
| 433 |
+
# gene_positions = {'CCDC101': [28553928,28591790]}
|
| 434 |
+
# search = Entrez.esearch(db="gene", term='%s[Gene Name] AND Homo Sapiens[Organism]' % (gene_name))
|
| 435 |
+
# records = Entrez.read(search)
|
| 436 |
+
|
| 437 |
+
# if len(records['IdList']) > 1:
|
| 438 |
+
# print "warning, multiple hits found for entrez gene search %s" % gene_name
|
| 439 |
+
|
| 440 |
+
# elink = Entrez.read(Entrez.elink(dbfrom="gene", db='nucleotide', id=records['IdList'][0]))
|
| 441 |
+
# nucl_id = elink[0]['LinkSetDb'][3]
|
| 442 |
+
|
| 443 |
+
# cut = False
|
| 444 |
+
# if nucl_id['LinkName'] != 'gene_nuccore_refseqgene':
|
| 445 |
+
# if gene_name in gene_positions.keys():
|
| 446 |
+
# nucl_id = elink[0]['LinkSetDb'][0]['Link'][0]['Id']
|
| 447 |
+
# cut = True
|
| 448 |
+
# else:
|
| 449 |
+
# print "sorry not enough information to return sequence"
|
| 450 |
+
# return None
|
| 451 |
+
# else:
|
| 452 |
+
# nucl_id = nucl_id['Link'][0]['Id']
|
| 453 |
+
|
| 454 |
+
# handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
|
| 455 |
+
# record = SeqIO.read(handle, "genbank")
|
| 456 |
+
# handle.close()
|
| 457 |
+
|
| 458 |
+
# if cut:
|
| 459 |
+
# start, end = gene_positions[gene_name]
|
| 460 |
+
# return str(record.seq)[start:end]
|
| 461 |
+
# else:
|
| 462 |
+
# return str(record.seq)
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
def target_genes_stats(genes=['HPRT1', 'TADA1', 'NF2', 'TADA2B', 'NF1', 'CUL3', 'MED12', 'CCDC101']):
|
| 466 |
+
for gene in genes:
|
| 467 |
+
seq = get_gene_sequence(gene)
|
| 468 |
+
if seq != None:
|
| 469 |
+
print('%s \t\t\t\t len: %d \t GCcont: %.3f \t Temp: %.4f \t molweight: %.4f' % (gene, len(seq), SeqUtil.GC(seq), Tm.Tm_staluc(seq, rna=False), SeqUtil.molecular_weight(seq, 'DNA')))
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
def ranktrafo(data):
|
| 473 |
+
X = data.values[:, None]
|
| 474 |
+
Is = X.argsort(axis=0)
|
| 475 |
+
RV = sp.zeros_like(X)
|
| 476 |
+
rank = sp.zeros_like(X)
|
| 477 |
+
for i in range(X.shape[1]):
|
| 478 |
+
x = X[:,i]
|
| 479 |
+
rank = sp.stats.rankdata(x)
|
| 480 |
+
rank /= (X.shape[0]+1)
|
| 481 |
+
RV[:,i] = sp.sqrt(2) * sp.special.erfinv(2*rank-1)
|
| 482 |
+
|
| 483 |
+
return RV.flatten()
|
| 484 |
+
|
| 485 |
+
def get_ranks(y, thresh=0.8, prefix="", flip=False, col_name='score'):
|
| 486 |
+
"""
|
| 487 |
+
y should be a DataFrame with one column
|
| 488 |
+
thresh is the threshold at which to call it a knock-down or not
|
| 489 |
+
col_name = 'score' is only for V2 data
|
| 490 |
+
flip should be FALSE for both V1 and V2!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
| 491 |
+
"""
|
| 492 |
+
|
| 493 |
+
if prefix is not None:
|
| 494 |
+
prefix = prefix + "_"
|
| 495 |
+
|
| 496 |
+
#y_rank = y.apply(ranktrafo)
|
| 497 |
+
y_rank = y.apply(sp.stats.mstats.rankdata)
|
| 498 |
+
y_rank /= y_rank.max()
|
| 499 |
+
|
| 500 |
+
if flip:
|
| 501 |
+
y_rank = 1.0 - y_rank # before this line, 1-labels where associated with low ranks, this flips it around (hence the y_rank > thresh below)
|
| 502 |
+
# we should NOT flip (V2), see README.txt in ./data
|
| 503 |
+
|
| 504 |
+
y_rank.columns = [prefix + "rank"]
|
| 505 |
+
y_threshold = (y_rank > thresh)*1
|
| 506 |
+
|
| 507 |
+
y_threshold.columns = [prefix + "threshold"]
|
| 508 |
+
|
| 509 |
+
# JL: undo the log2 transform (not sure this matters?)
|
| 510 |
+
y_rank_raw = (2**y).apply(scipy.stats.mstats.rankdata)
|
| 511 |
+
y_rank_raw /= y_rank_raw.max()
|
| 512 |
+
if flip:
|
| 513 |
+
y_rank_raw = 1.0 - y_rank_raw
|
| 514 |
+
y_rank_raw.columns = [prefix + "rank raw"]
|
| 515 |
+
assert ~np.any(np.isnan(y_rank)), "found NaN ranks"
|
| 516 |
+
|
| 517 |
+
# divides into quantiles, but not used:
|
| 518 |
+
# y_quantized = pandas.DataFrame(data=pandas.qcut(y[col_name], 5, labels=np.arange(5.0))) # quantized vector
|
| 519 |
+
y_quantized = y_threshold.copy()
|
| 520 |
+
y_quantized.columns = [prefix + "quantized"]
|
| 521 |
+
|
| 522 |
+
return y_rank, y_rank_raw, y_threshold, y_quantized
|
| 523 |
+
|
| 524 |
+
def get_data(data, y_names, organism="human", target_gene=None):
|
| 525 |
+
outputs = pandas.DataFrame()
|
| 526 |
+
'''
|
| 527 |
+
this is called once for each gene (aggregating across cell types)
|
| 528 |
+
y_names are cell types
|
| 529 |
+
e.g. call: X_CD13, Y_CD13 = get_data(cd13, y_names=['NB4 CD13', 'TF1 CD13'])
|
| 530 |
+
'''
|
| 531 |
+
|
| 532 |
+
#generate ranks for each cell type before aggregating to match what is in Doench et al
|
| 533 |
+
thresh = 0.8
|
| 534 |
+
for y_name in y_names: # for each cell type
|
| 535 |
+
y = pandas.DataFrame(data[y_name])
|
| 536 |
+
# these thresholds/quantils are not used:
|
| 537 |
+
y_rank, y_rank_raw, y_threshold, y_quantiles = get_ranks(y, thresh=thresh, flip=False, col_name=y_name)
|
| 538 |
+
y_rank.columns = [y_name + " rank"]
|
| 539 |
+
y_rank_raw.columns = [y_name + " rank raw"]
|
| 540 |
+
y_threshold.columns = [y_name + " threshold"]
|
| 541 |
+
|
| 542 |
+
outputs = pandas.concat([outputs, y, y_rank, y_threshold, y_rank_raw], axis=1)
|
| 543 |
+
|
| 544 |
+
|
| 545 |
+
#aggregated rank across cell types
|
| 546 |
+
average_activity = pandas.DataFrame(outputs[[y_name for y_name in y_names]].mean(1))
|
| 547 |
+
average_activity.columns = ['average activity']
|
| 548 |
+
|
| 549 |
+
average_rank_from_avg_activity = get_ranks(average_activity, thresh=thresh, flip=False, col_name='average activity')[0]
|
| 550 |
+
average_rank_from_avg_activity.columns = ['average_rank_from_avg_activity']
|
| 551 |
+
average_threshold_from_avg_activity = (average_rank_from_avg_activity > thresh)*1
|
| 552 |
+
average_threshold_from_avg_activity.columns = ['average_threshold_from_avg_activity']
|
| 553 |
+
|
| 554 |
+
average_rank = pandas.DataFrame(outputs[[y_name + ' rank' for y_name in y_names]].mean(1))
|
| 555 |
+
average_rank.columns = ['average rank']
|
| 556 |
+
# higher ranks are better (when flip=False as it should be)
|
| 557 |
+
average_threshold = (average_rank > thresh)*1
|
| 558 |
+
average_threshold.columns = ['average threshold']
|
| 559 |
+
|
| 560 |
+
# undo the log2 trafo on the reads per million, apply rank trafo right away
|
| 561 |
+
average_rank_raw = pandas.DataFrame(outputs[[y_name+' rank raw' for y_name in y_names]].mean(1))
|
| 562 |
+
average_rank_raw.columns = ['average rank raw']
|
| 563 |
+
outputs = pandas.concat([outputs, average_rank, average_threshold, average_activity, average_rank_raw, average_rank_from_avg_activity, average_threshold_from_avg_activity], axis=1)
|
| 564 |
+
|
| 565 |
+
# import ipdb; ipdb.set_trace()
|
| 566 |
+
|
| 567 |
+
#sequence-specific computations
|
| 568 |
+
#features = featurize_data(data)
|
| 569 |
+
#strip out featurization to later
|
| 570 |
+
features = pandas.DataFrame(data['30mer'])
|
| 571 |
+
|
| 572 |
+
if organism == "human":
|
| 573 |
+
target_gene = y_names[0].split(' ')[1]
|
| 574 |
+
|
| 575 |
+
outputs['Target gene'] = target_gene
|
| 576 |
+
outputs['Organism'] = organism
|
| 577 |
+
|
| 578 |
+
features['Target gene'] = target_gene
|
| 579 |
+
features['Organism'] = organism
|
| 580 |
+
features['Strand'] = pandas.DataFrame(data['Strand'])
|
| 581 |
+
|
| 582 |
+
return features, outputs
|
| 583 |
+
|
| 584 |
+
|
| 585 |
+
# def plot_metrics(metrics, truth_and_predictions, target_genes, run_label, color=None, filename_prefix=None, learn_options=None):
|
| 586 |
+
#
|
| 587 |
+
# if learn_options["metric"] == 'AUC':
|
| 588 |
+
# best = truth_and_predictions[0]#[np.argmax(cv_scores)]
|
| 589 |
+
# plt.figure('ROC per gene')
|
| 590 |
+
# plt.figure('global ROC')
|
| 591 |
+
# plt.figure('AUC ROC per gene')
|
| 592 |
+
#
|
| 593 |
+
# all_truth = np.array([])
|
| 594 |
+
# all_predictions = np.array([])
|
| 595 |
+
# AUCs = []
|
| 596 |
+
# AUCs_labels = []
|
| 597 |
+
# for i, gene in enumerate(target_genes):
|
| 598 |
+
# if len(best[1][gene])==0:
|
| 599 |
+
# continue
|
| 600 |
+
# plt.figure('ROC per gene')
|
| 601 |
+
# plt.subplot(331+i)
|
| 602 |
+
# fpr, tpr, _ = sklearn.metrics.roc_curve(best[0][gene], best[1][gene])
|
| 603 |
+
# np.savetxt('../results/%s_ROC.txt' % gene, np.hstack((fpr[:, None], tpr[:, None])))
|
| 604 |
+
#
|
| 605 |
+
# roc_auc = sklearn.metrics.auc(fpr, tpr)
|
| 606 |
+
# AUCs.append(roc_auc)
|
| 607 |
+
# AUCs_labels.append(gene)
|
| 608 |
+
# plt.plot(fpr, tpr, label=run_label)
|
| 609 |
+
# plt.title(gene)
|
| 610 |
+
# h1 = plt.figure('global ROC')
|
| 611 |
+
# plt.plot(fpr, tpr, color=color, alpha=.2, linewidth=2.)
|
| 612 |
+
#
|
| 613 |
+
# all_truth = np.hstack((all_truth, best[0][gene]))
|
| 614 |
+
# all_predictions = np.hstack((all_predictions, best[1][gene]))
|
| 615 |
+
#
|
| 616 |
+
# plt.legend(loc=0)
|
| 617 |
+
#
|
| 618 |
+
# plt.figure('AUC ROC per gene')
|
| 619 |
+
# ax = plt.subplot(111)
|
| 620 |
+
# rect = ax.bar(list(range(len(AUCs))), AUCs, width=0.8)
|
| 621 |
+
# autolabel(ax,rect)
|
| 622 |
+
#
|
| 623 |
+
# ax.set_ylim((0.5, 1.0))
|
| 624 |
+
# ax.set_ylabel('AUC ROC')
|
| 625 |
+
# ax.set_xticks(np.array(list(range(len(AUCs)))) + 0.8 / 2)
|
| 626 |
+
# ax.set_xticklabels([t for t in AUCs_labels])
|
| 627 |
+
#
|
| 628 |
+
# fpr, tpr, _ = sklearn.metrics.roc_curve(all_truth, all_predictions)
|
| 629 |
+
# roc_auc = sklearn.metrics.auc(fpr, tpr)
|
| 630 |
+
# #print run_label, roc_auc
|
| 631 |
+
# plt.figure('global ROC')
|
| 632 |
+
# plt.plot(fpr, tpr, label=run_label + " AUC=%.2f" % roc_auc, color=color, linewidth=2.)
|
| 633 |
+
# plt.legend(loc=0)
|
| 634 |
+
# plt.xlabel('False Positive Rate')
|
| 635 |
+
# plt.ylabel('True Positive Rate')
|
| 636 |
+
# #np.savetxt('../results/global_ROC.txt', np.hstack((fpr[:, None], tpr[:, None])))
|
| 637 |
+
# #np.savetxt('../results/AUCs.txt', np.hstack((np.array([t for t in target_genes])[:, None], np.array(AUCs)[:, None])), fmt='%s')
|
| 638 |
+
#
|
| 639 |
+
# if filename_prefix != None:
|
| 640 |
+
# plt.figure('global ROC')
|
| 641 |
+
# plt.savefig(filename_prefix+'globalROC.png')
|
| 642 |
+
#
|
| 643 |
+
# plt.figure('ROC per gene')
|
| 644 |
+
# plt.savefig(filename_prefix+'ROC_per_gene.png')
|
| 645 |
+
#
|
| 646 |
+
# plt.figure('AUC ROC per gene')
|
| 647 |
+
# plt.savefig(filename_prefix+'AUCROC_barplot.png')
|
| 648 |
+
# return roc_auc
|
| 649 |
+
# else:
|
| 650 |
+
# plt.figure('NDCG per gene')
|
| 651 |
+
# ax = plt.subplot(111)
|
| 652 |
+
# rect = ax.bar(list(range(len(metrics))), metrics, width=0.8)
|
| 653 |
+
# autolabel(ax,rect)
|
| 654 |
+
# ax.set_ylim((0.0, 1.2))
|
| 655 |
+
# ax.set_ylabel('NDCG')
|
| 656 |
+
# ax.set_xticks(np.array(list(range(len(metrics)))) + 0.8 / 2)
|
| 657 |
+
# ax.set_xticklabels([t for t in target_genes])
|
| 658 |
+
#
|
| 659 |
+
# truth, predictions = truth_and_predictions[0]
|
| 660 |
+
# all_truth = np.array([])
|
| 661 |
+
# all_predictions = np.array([])
|
| 662 |
+
#
|
| 663 |
+
# for i, gene in enumerate(target_genes):
|
| 664 |
+
# if len(predictions[gene])==0:
|
| 665 |
+
# continue
|
| 666 |
+
#
|
| 667 |
+
# all_truth = np.hstack((all_truth, truth[gene]))
|
| 668 |
+
# all_predictions = np.hstack((all_predictions, predictions[gene]))
|
| 669 |
+
#
|
| 670 |
+
# sorted = all_predictions[np.argsort(all_truth).flatten()[::-1]]
|
| 671 |
+
# sortedgt = np.sort(all_truth).flatten()[::-1]
|
| 672 |
+
# NDCG_total = ranking_metrics.ndcg_at_k_custom_n(sorted, learn_options["NDGC_k"], sortedgt)
|
| 673 |
+
#
|
| 674 |
+
# if filename_prefix != None:
|
| 675 |
+
# plt.figure('NDCG per gene')
|
| 676 |
+
# plt.savefig(filename_prefix+'NDCG_barplot.png')
|
| 677 |
+
#
|
| 678 |
+
# return NDCG_total
|
| 679 |
+
|
| 680 |
+
def autolabel(ax, rects, strfrm='%.2f'):
|
| 681 |
+
'''
|
| 682 |
+
Automatically add value over each bar in bar chart
|
| 683 |
+
http://matplotlib.org/1.4.2/examples/api/barchart_demo.html
|
| 684 |
+
'''
|
| 685 |
+
for rect in rects:
|
| 686 |
+
height = rect.get_height()
|
| 687 |
+
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, strfrm % float(height),
|
| 688 |
+
ha='center', va='bottom')
|
| 689 |
+
|
| 690 |
+
|
| 691 |
+
def create_cachedir(dirname='./cache/default'):
|
| 692 |
+
if os.path.exists(dirname):
|
| 693 |
+
return dirname
|
| 694 |
+
else:
|
| 695 |
+
os.makedirs(dirname)
|
| 696 |
+
return dirname
|
| 697 |
+
|
| 698 |
+
def dcg(relevances, rank=20):
|
| 699 |
+
relevances = np.asarray(relevances)[:rank]
|
| 700 |
+
n_relevances = len(relevances)
|
| 701 |
+
if n_relevances == 0:
|
| 702 |
+
return 0.
|
| 703 |
+
discounts = np.log2(np.arange(n_relevances) + 2)
|
| 704 |
+
return np.sum(relevances / discounts)
|
| 705 |
+
|
| 706 |
+
def ndcgk(relevances, rank=20):
|
| 707 |
+
best_dcg = dcg(sorted(relevances, reverse=True), rank)
|
| 708 |
+
if best_dcg == 0:
|
| 709 |
+
return 0.
|
| 710 |
+
return dcg(relevances, rank) / best_dcg
|
| 711 |
+
|
| 712 |
+
def extract_feature_from_model(method, results, split):
|
| 713 |
+
model_type = results[method][3][split]
|
| 714 |
+
if isinstance(model_type, sklearn.linear_model.coordinate_descent.ElasticNet):
|
| 715 |
+
tmp_imp = results[method][3][split].coef_[:, None]
|
| 716 |
+
elif isinstance(model_type, sklearn.ensemble.GradientBoostingRegressor):
|
| 717 |
+
tmp_imp = results[method][3][split].feature_importances_[:, None]
|
| 718 |
+
else:
|
| 719 |
+
raise Exception("need to add model %s to feature extraction" % model_type)
|
| 720 |
+
return tmp_imp
|
| 721 |
+
|
| 722 |
+
def extract_feature_from_model_sum(method, results, split, indexes):
|
| 723 |
+
model_type = results[method][3][split]
|
| 724 |
+
if isinstance(model_type, sklearn.linear_model.coordinate_descent.ElasticNet):
|
| 725 |
+
tmp_imp = np.sum(results[method][3][split].coef_[indexes])
|
| 726 |
+
elif isinstance(model_type, sklearn.ensemble.GradientBoostingRegressor):
|
| 727 |
+
tmp_imp = np.sum(results[method][3][split].feature_importances_[indexes])
|
| 728 |
+
else:
|
| 729 |
+
raise Exception("need to add model %s to feature extraction" % model_type)
|
| 730 |
+
return tmp_imp
|
| 731 |
+
|
| 732 |
+
def feature_importances(results, fontsize=16, figsize=(14, 8)):
|
| 733 |
+
for method in list(results.keys()):
|
| 734 |
+
feature_names = results[method][6]
|
| 735 |
+
|
| 736 |
+
seen = set()
|
| 737 |
+
uniq = []
|
| 738 |
+
for ft in feature_names:
|
| 739 |
+
if ft not in seen:
|
| 740 |
+
uniq.append(ft)
|
| 741 |
+
else:
|
| 742 |
+
seen.add(ft)
|
| 743 |
+
if len(seen) > 0:
|
| 744 |
+
raise Exception("feature name appears more than once: %s" % seen)
|
| 745 |
+
|
| 746 |
+
# grouped_feat = {'pd_order1' : [i for i,s in enumerate(feature_names) if s.startswith("_pd.Order1")],
|
| 747 |
+
# 'pd_order2' : [i for i,s in enumerate(feature_names) if s.startswith("_pd.Order2")],
|
| 748 |
+
# 'pd_order3' : [i for i,s in enumerate(feature_names) if s.startswith("_pd.Order3")],
|
| 749 |
+
# 'pi_order1' : [i for i,s in enumerate(feature_names) if s.startswith("_pi.Order1")],
|
| 750 |
+
# 'pi_order2' : [i for i,s in enumerate(feature_names) if s.startswith("_pi.Order2")],
|
| 751 |
+
# 'pi_order3' : [i for i,s in enumerate(feature_names) if s.startswith("_pi.Order3")],
|
| 752 |
+
# 'NGGX_pd.Order2' : [i for i,s in enumerate(feature_names) if s.startswith("NGGX_pd.Order2")]
|
| 753 |
+
# }
|
| 754 |
+
|
| 755 |
+
pd_order1, pi_order1, pd_order2, pi_order2, nggx = [], [], [], [], []
|
| 756 |
+
for i,s in enumerate(feature_names):
|
| 757 |
+
if 'False' in s:
|
| 758 |
+
continue
|
| 759 |
+
elif "_" in s:
|
| 760 |
+
nucl, pos = s.split('_')
|
| 761 |
+
if len(nucl) == 1:
|
| 762 |
+
pd_order1.append(i)
|
| 763 |
+
elif len(nucl) == 2:
|
| 764 |
+
pd_order2.append(i)
|
| 765 |
+
elif "NGGX_pd.Order2" in s:
|
| 766 |
+
nggx.append(i)
|
| 767 |
+
else:
|
| 768 |
+
nucl = s
|
| 769 |
+
if len(nucl) == 1:
|
| 770 |
+
pi_order1.append(i)
|
| 771 |
+
elif len(nucl) == 2:
|
| 772 |
+
pi_order2.append(i)
|
| 773 |
+
|
| 774 |
+
grouped_feat = {'pd_order2': pd_order2,
|
| 775 |
+
'pi_order2': pi_order2,
|
| 776 |
+
'pd_order1': pd_order1,
|
| 777 |
+
'pi_order1': pi_order1,
|
| 778 |
+
'NGGX_pd.Order2': nggx,}
|
| 779 |
+
|
| 780 |
+
grouped_feat_ind = []
|
| 781 |
+
[grouped_feat_ind.extend(grouped_feat[a]) for a in list(grouped_feat.keys())]
|
| 782 |
+
remaining_features_ind = set.difference(set(range(len(feature_names))), set(grouped_feat_ind))
|
| 783 |
+
|
| 784 |
+
for i in remaining_features_ind:
|
| 785 |
+
grouped_feat[feature_names[i]] = [i]
|
| 786 |
+
|
| 787 |
+
feature_importances_grouped = {}
|
| 788 |
+
for k in grouped_feat:
|
| 789 |
+
if len(grouped_feat[k]) == 0:
|
| 790 |
+
continue
|
| 791 |
+
else:
|
| 792 |
+
for split in list(results[method][3].keys()):
|
| 793 |
+
split_feat_importance = extract_feature_from_model_sum(method, results, split, grouped_feat[k])
|
| 794 |
+
if k not in feature_importances_grouped:
|
| 795 |
+
feature_importances_grouped[k] = [split_feat_importance]
|
| 796 |
+
else:
|
| 797 |
+
feature_importances_grouped[k].append(split_feat_importance)
|
| 798 |
+
|
| 799 |
+
all_split_importances = None
|
| 800 |
+
for split in list(results[method][3].keys()):
|
| 801 |
+
|
| 802 |
+
split_feat_importance = extract_feature_from_model(method, results, split)
|
| 803 |
+
|
| 804 |
+
if all_split_importances is None:
|
| 805 |
+
all_split_importances = split_feat_importance.copy()
|
| 806 |
+
else:
|
| 807 |
+
all_split_importances = np.append(all_split_importances, split_feat_importance, axis=1)
|
| 808 |
+
|
| 809 |
+
avg_importance = np.mean(all_split_importances, axis=1)[:, None]
|
| 810 |
+
std_importance = np.std(all_split_importances, axis=1)[:, None]
|
| 811 |
+
imp_array = np.concatenate((np.array(feature_names)[:, None], avg_importance, std_importance), axis=1)
|
| 812 |
+
|
| 813 |
+
df = pandas.DataFrame(data=imp_array, columns=['Feature name', 'Mean feature importance', 'Std. Dev.'])
|
| 814 |
+
df = df.convert_objects(convert_numeric=True)
|
| 815 |
+
|
| 816 |
+
boxplot_labels = np.array([k for k in list(feature_importances_grouped.keys())])
|
| 817 |
+
boxplot_arrays = np.concatenate([np.array(feature_importances_grouped[k])[:, None] for k in boxplot_labels], axis=1)
|
| 818 |
+
|
| 819 |
+
feature_dictionary = {
|
| 820 |
+
'pd_order2': 'position dep. order 2 ',
|
| 821 |
+
'pd_order1': 'position dep. order 1 ',
|
| 822 |
+
'pi_order1': 'position ind. order 1 ',
|
| 823 |
+
'pi_order2': 'position ind. order 2 ',
|
| 824 |
+
'5mer_end_False': 'Tm (5mer end)',
|
| 825 |
+
'5mer_start_False': 'Tm (5mer start)',
|
| 826 |
+
'Amino Acid Cut position': 'amino acid cut position ',
|
| 827 |
+
'8mer_middle_False': 'Tm (8mer middle)',
|
| 828 |
+
'NGGX_pd.Order2': 'NGGN interaction ',
|
| 829 |
+
'Tm global_False': 'Tm (30mer)',
|
| 830 |
+
'Percent Peptide': 'percent peptide ',
|
| 831 |
+
}
|
| 832 |
+
|
| 833 |
+
for i in range(df.shape[0]):
|
| 834 |
+
thisfeat = df['Feature name'].iloc[i]
|
| 835 |
+
if thisfeat in list(feature_dictionary.keys()):
|
| 836 |
+
df['Feature name'].iloc[i] = feature_dictionary[thisfeat]
|
| 837 |
+
|
| 838 |
+
descriptive_labels = np.array([feature_dictionary[k] if k in list(feature_dictionary.keys()) else k + " " for k in boxplot_labels])
|
| 839 |
+
|
| 840 |
+
sorted_boxplot = np.argsort(np.median(boxplot_arrays, axis=0))[::-1]
|
| 841 |
+
boxplot_means = np.mean(boxplot_arrays, axis=0)[sorted_boxplot]
|
| 842 |
+
boxplot_std = np.std(boxplot_arrays, axis=0)[sorted_boxplot]
|
| 843 |
+
|
| 844 |
+
ind = np.arange(0, len(boxplot_labels)*2, 2)# farange(len(boxplot_labels))
|
| 845 |
+
width = 1.5
|
| 846 |
+
# plt.figure(figsize=figsize)
|
| 847 |
+
# plt.bar(ind, boxplot_means, width, color='#186499', yerr=boxplot_std, ecolor='k', edgecolor='none')
|
| 848 |
+
|
| 849 |
+
# ax = plt.gca()
|
| 850 |
+
# ax.set_ylabel('Average Gini importances', fontsize=fontsize)
|
| 851 |
+
# ax.set_xticks(ind+width/2.0 + 0.1)
|
| 852 |
+
|
| 853 |
+
# ax.set_xticklabels(descriptive_labels[sorted_boxplot], rotation=90, fontsize=fontsize)
|
| 854 |
+
# plt.ylim([0.0, 0.5])
|
| 855 |
+
# plt.subplots_adjust(top = 0.97, bottom = 0.4)
|
| 856 |
+
|
| 857 |
+
# plt.boxplot(boxplot_arrays[:, sorted_boxplot])
|
| 858 |
+
# plt.ylabel('Average Gini')
|
| 859 |
+
# plt.xticks(range(1, len(boxplot_labels)+1), np.array(boxplot_labels)[sorted_boxplot], rotation=70)
|
| 860 |
+
# plt.subplots_adjust(top = 0.97, bottom = 0.4)
|
| 861 |
+
return df
|
| 862 |
+
|
| 863 |
+
def check_learn_options_set(learn_options_set):
|
| 864 |
+
if learn_options_set is None:
|
| 865 |
+
return 'ranks'
|
| 866 |
+
|
| 867 |
+
non_binary_target_name_agree = True
|
| 868 |
+
non_binary_target_name = None
|
| 869 |
+
for l in list(learn_options_set.values()):
|
| 870 |
+
if non_binary_target_name is None:
|
| 871 |
+
non_binary_target_name = l["testing_non_binary_target_name"]
|
| 872 |
+
else:
|
| 873 |
+
assert non_binary_target_name == l["testing_non_binary_target_name"], "need to have same testing_non_binary_target_name across all learn options in a set for metrics to be comparable"
|
| 874 |
+
return non_binary_target_name
|
| 875 |
+
|
| 876 |
+
def get_all_metrics(results, learn_options_set=None, test_metrics=['spearmanr'], add_extras=False, force_by_gene=False):
|
| 877 |
+
"""
|
| 878 |
+
'metrics' here are the metrics used to evaluate
|
| 879 |
+
"""
|
| 880 |
+
all_results = dict([(k, {}) for k in list(results.keys())])
|
| 881 |
+
genes = list(results[list(results.keys())[0]][1][0][0].keys())
|
| 882 |
+
|
| 883 |
+
for metric in test_metrics:
|
| 884 |
+
for method in list(all_results.keys()):
|
| 885 |
+
all_results[method][metric] = []
|
| 886 |
+
|
| 887 |
+
non_binary_target_name = check_learn_options_set(learn_options_set)
|
| 888 |
+
|
| 889 |
+
for method in list(results.keys()):
|
| 890 |
+
truth, predictions = results[method][1][0]
|
| 891 |
+
test_indices = results[method][-1]
|
| 892 |
+
tmp_genes = list(results[method][1][0][0].keys())
|
| 893 |
+
if len(tmp_genes) != len(tmp_genes) or np.any(tmp_genes==genes): "genes have changed, need to modify code"
|
| 894 |
+
all_truth_raw, all_truth_thrs, all_predictions = np.array([]), np.array([]), np.array([])
|
| 895 |
+
|
| 896 |
+
fpr_gene = {}
|
| 897 |
+
tpr_gene ={}
|
| 898 |
+
y_truth_thresh_all = np.array([])
|
| 899 |
+
y_pred_all = np.array([])
|
| 900 |
+
|
| 901 |
+
for gene in genes:
|
| 902 |
+
y_truth, y_pred = truth[gene], predictions[gene]
|
| 903 |
+
all_truth_raw = np.append(all_truth_raw, y_truth[non_binary_target_name])
|
| 904 |
+
all_truth_thrs = np.append(all_truth_thrs, y_truth['thrs'])
|
| 905 |
+
all_predictions = np.append(all_predictions, y_pred)
|
| 906 |
+
|
| 907 |
+
y_truth_thresh_all = np.append(y_truth_thresh_all, y_truth['thrs'])
|
| 908 |
+
y_pred_all = np.append(y_pred_all, y_pred)
|
| 909 |
+
|
| 910 |
+
if 'spearmanr' in test_metrics:
|
| 911 |
+
spearmanr = spearmanr_nonan(y_truth[non_binary_target_name], y_pred)[0]
|
| 912 |
+
all_results[method]['spearmanr'].append(spearmanr)
|
| 913 |
+
|
| 914 |
+
if 'spearmanr>2.5' in test_metrics:
|
| 915 |
+
selected = y_truth[non_binary_target_name] > 1.0
|
| 916 |
+
#spearmanr = sp.stats.spearmanr(y_truth[non_binary_target_name][selected], y_pred[selected])[0]
|
| 917 |
+
spearmanr = np.sqrt(np.mean((y_truth[non_binary_target_name][selected] - y_pred[selected])**2))
|
| 918 |
+
all_results[method]['spearmanr>2.5'].append(spearmanr)
|
| 919 |
+
|
| 920 |
+
if 'RMSE' in test_metrics:
|
| 921 |
+
rmse = np.sqrt(np.mean((y_truth[non_binary_target_name] - y_pred)**2))
|
| 922 |
+
all_results[method]['RMSE'].append(rmse)
|
| 923 |
+
|
| 924 |
+
if 'NDCG@5' in test_metrics:
|
| 925 |
+
ndcg = ranking_metrics.ndcg_at_k_ties(y_truth[non_binary_target_name], y_pred, 5)
|
| 926 |
+
all_results[method]['NDCG@5'].append(ndcg)
|
| 927 |
+
|
| 928 |
+
if 'NDCG@10' in test_metrics:
|
| 929 |
+
ndcg = ranking_metrics.ndcg_at_k_ties(y_truth[non_binary_target_name], y_pred, 10)
|
| 930 |
+
all_results[method]['NDCG@10'].append(ndcg)
|
| 931 |
+
|
| 932 |
+
if 'NDCG@20' in test_metrics:
|
| 933 |
+
ndcg = ranking_metrics.ndcg_at_k_ties(y_truth[non_binary_target_name], y_pred, 20)
|
| 934 |
+
all_results[method]['NDCG@20'].append(ndcg)
|
| 935 |
+
|
| 936 |
+
if 'NDCG@50' in test_metrics:
|
| 937 |
+
ndcg = ranking_metrics.ndcg_at_k_ties(y_truth[non_binary_target_name], y_pred, 50)
|
| 938 |
+
all_results[method]['NDCG@50'].append(ndcg)
|
| 939 |
+
|
| 940 |
+
if 'precision@5' in test_metrics:
|
| 941 |
+
y_top_truth = (y_truth[non_binary_target_name] >= np.sort(y_truth[non_binary_target_name])[::-1][:5][-1]) * 1
|
| 942 |
+
y_top_pred = (y_pred >= np.sort(y_pred)[::-1][:5][-1]) * 1
|
| 943 |
+
all_results[method]['precision@5'].append(sklearn.metrics.precision_score(y_top_pred, y_top_truth))
|
| 944 |
+
|
| 945 |
+
if 'precision@10' in test_metrics:
|
| 946 |
+
y_top_truth = (y_truth[non_binary_target_name] >= np.sort(y_truth[non_binary_target_name])[::-1][:10][-1]) * 1
|
| 947 |
+
y_top_pred = (y_pred >= np.sort(y_pred)[::-1][:10][-1]) * 1
|
| 948 |
+
all_results[method]['precision@10'].append(sklearn.metrics.precision_score(y_top_pred, y_top_truth))
|
| 949 |
+
|
| 950 |
+
if 'precision@20' in test_metrics:
|
| 951 |
+
y_top_truth = (y_truth[non_binary_target_name] >= np.sort(y_truth[non_binary_target_name])[::-1][:20][-1]) * 1
|
| 952 |
+
y_top_pred = (y_pred >= np.sort(y_pred)[::-1][:20][-1]) * 1
|
| 953 |
+
all_results[method]['precision@20'].append(sklearn.metrics.precision_score(y_top_pred, y_top_truth))
|
| 954 |
+
|
| 955 |
+
if 'AUC' in test_metrics:
|
| 956 |
+
fpr_gene[gene], tpr_gene[gene], _ = sklearn.metrics.roc_curve(y_truth['thrs'], y_pred)
|
| 957 |
+
auc = sklearn.metrics.auc(fpr_gene[gene], tpr_gene[gene])
|
| 958 |
+
all_results[method]['AUC'].append(auc)
|
| 959 |
+
|
| 960 |
+
if add_extras:
|
| 961 |
+
fpr_all, tpr_all, _ = sklearn.metrics.roc_curve(y_truth_thresh_all, y_pred_all)
|
| 962 |
+
return all_results, genes, fpr_all, tpr_all, fpr_gene, tpr_gene
|
| 963 |
+
else:
|
| 964 |
+
return all_results, genes
|
| 965 |
+
|
| 966 |
+
def plot_all_metrics(metrics, gene_names, all_learn_options, save, plots=None, bottom=0.19):
|
| 967 |
+
pass
|
| 968 |
+
# num_methods = len(list(metrics.keys()))
|
| 969 |
+
# metrics_names = list(metrics[list(metrics.keys())[0]].keys())
|
| 970 |
+
# num_genes = len(gene_names)
|
| 971 |
+
# width = 0.9/num_methods
|
| 972 |
+
# ind = np.arange(num_genes)
|
| 973 |
+
#
|
| 974 |
+
# if save==True:
|
| 975 |
+
# first_key = list(all_learn_options.keys())[0]
|
| 976 |
+
# #basefile = r"..\results\V%s_trmetric%s_%s" % (all_learn_options[first_key]["V"], all_learn_options[first_key]["training_metric"], datestamp())
|
| 977 |
+
# basefile = r"..\results\%s" % (first_key)
|
| 978 |
+
#
|
| 979 |
+
# d = os.path.dirname(basefile)
|
| 980 |
+
# if not os.path.exists(d):
|
| 981 |
+
# os.makedirs(d)
|
| 982 |
+
# with open(basefile + ".plot.pickle", "wb") as f:
|
| 983 |
+
# pickle.dump([metrics, all_learn_options, gene_names], f)
|
| 984 |
+
#
|
| 985 |
+
# for metric in metrics_names:
|
| 986 |
+
# if 'global' not in metric:
|
| 987 |
+
# plt.figure(metric, figsize=(20, 8))
|
| 988 |
+
# elif plots == None or 'gene level' in plots:
|
| 989 |
+
# plt.figure(metric, figsize=(12, 12))
|
| 990 |
+
#
|
| 991 |
+
# boxplot_labels = []
|
| 992 |
+
# boxplot_arrays = {}
|
| 993 |
+
# boxplot_median = {}
|
| 994 |
+
#
|
| 995 |
+
# for i, method in enumerate(metrics.keys()):
|
| 996 |
+
# boxplot_labels.append(method)
|
| 997 |
+
# for metric in list(metrics[method].keys()):
|
| 998 |
+
#
|
| 999 |
+
# if 'global' in metric:
|
| 1000 |
+
# plt.figure(metric)
|
| 1001 |
+
# plt.bar([i], metrics[method][metric], 0.9, color=plt.cm.Paired(1.*i/len(list(metrics.keys()))), label=method)
|
| 1002 |
+
# else:
|
| 1003 |
+
# if plots == None or 'gene level' in plots:
|
| 1004 |
+
# plt.figure(metric)
|
| 1005 |
+
# plt.bar(ind+(i*width), metrics[method][metric], width, color=plt.cm.Paired(1.*i/len(list(metrics.keys()))), label=method)
|
| 1006 |
+
#
|
| 1007 |
+
# median_metric = np.median(metrics[method][metric])
|
| 1008 |
+
# print(method, metric, median_metric)
|
| 1009 |
+
# assert not np.isnan(median_metric), "found nan for %s, %s" % (method, metric)
|
| 1010 |
+
# if metric not in list(boxplot_arrays.keys()):
|
| 1011 |
+
# boxplot_arrays[metric] = np.array(metrics[method][metric])[:, None]
|
| 1012 |
+
# boxplot_median[metric] = [np.median(np.array(metrics[method][metric]))]
|
| 1013 |
+
# else:
|
| 1014 |
+
# boxplot_arrays[metric] = np.concatenate((boxplot_arrays[metric], np.array(metrics[method][metric])[:, None]), axis=1)
|
| 1015 |
+
# boxplot_median[metric].append(np.median(np.array(metrics[method][metric])))
|
| 1016 |
+
#
|
| 1017 |
+
#
|
| 1018 |
+
# for metric in metrics_names:
|
| 1019 |
+
# if plots == None or 'gene level' in plots:
|
| 1020 |
+
# ax = plt.figure(metric)
|
| 1021 |
+
# leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
| 1022 |
+
# # leg.draggable(state=True, use_blit=True)
|
| 1023 |
+
# plt.ylabel(metric)
|
| 1024 |
+
#
|
| 1025 |
+
# if 'global' in metric:
|
| 1026 |
+
# plt.xticks(list(range(len(list(metrics.keys())))), list(metrics.keys()), rotation=70)
|
| 1027 |
+
# plt.grid(True, which='both')
|
| 1028 |
+
# plt.subplots_adjust(left = 0.05, right = 0.8)
|
| 1029 |
+
# else:
|
| 1030 |
+
# plt.xticks(ind+width, gene_names)
|
| 1031 |
+
# plt.grid(True, which='both')
|
| 1032 |
+
# plt.subplots_adjust(left = 0.05, right = 0.8)
|
| 1033 |
+
# if save == True:
|
| 1034 |
+
# plt.xticks(ind+0.5, gene_names)
|
| 1035 |
+
# if metric=='AUC':
|
| 1036 |
+
# plt.ylim([0.5, 1.0])
|
| 1037 |
+
# plt.savefig(basefile + "_" + metric + "_bar" + ".png")
|
| 1038 |
+
#
|
| 1039 |
+
# if (plots == None or "boxplots" in plots) and 'global' not in metric:
|
| 1040 |
+
# plt.figure('Boxplot %s' % metric)
|
| 1041 |
+
#
|
| 1042 |
+
# sorted_boxplot = np.argsort(boxplot_median[metric])[::-1]
|
| 1043 |
+
#
|
| 1044 |
+
# plt.boxplot(boxplot_arrays[metric][:, sorted_boxplot])
|
| 1045 |
+
# plt.ylabel(metric)
|
| 1046 |
+
# plt.xticks(list(range(1, num_methods+1)), np.array(boxplot_labels)[sorted_boxplot], rotation=70)
|
| 1047 |
+
# plt.subplots_adjust(top = 0.97, bottom = bottom)
|
| 1048 |
+
#
|
| 1049 |
+
# if metric == 'RMSE':
|
| 1050 |
+
# plt.ylim((1.0, 2.0))
|
| 1051 |
+
#
|
| 1052 |
+
# if save == True:
|
| 1053 |
+
# plt.savefig(basefile + "_" + metric + ".png")
|
| 1054 |
+
|
| 1055 |
+
def load_results(directory, all_results, all_learn_options, model_filter=None, append_to_key=None):
|
| 1056 |
+
'''
|
| 1057 |
+
Only load up files which contain one of the strings in model_filter in their names
|
| 1058 |
+
model_filter should be a list, or a string
|
| 1059 |
+
'''
|
| 1060 |
+
num_added = 0
|
| 1061 |
+
filelist = glob.glob(directory+'\\*.pickle')
|
| 1062 |
+
if filelist ==[]:
|
| 1063 |
+
raise Exception("found no pickle files in %s" % directory)
|
| 1064 |
+
else:
|
| 1065 |
+
print("found %d files in %s" % (len(filelist), directory))
|
| 1066 |
+
|
| 1067 |
+
for results_file in filelist:
|
| 1068 |
+
if 'learn_options' in results_file:
|
| 1069 |
+
continue
|
| 1070 |
+
|
| 1071 |
+
if model_filter != None:
|
| 1072 |
+
if isinstance(model_filter, list):
|
| 1073 |
+
in_filt = False
|
| 1074 |
+
for m in model_filter:
|
| 1075 |
+
if m in results_file:
|
| 1076 |
+
in_filt = True
|
| 1077 |
+
if not in_filt:
|
| 1078 |
+
print("%s not in model_filter" % (results_file))#, model_filter)
|
| 1079 |
+
continue
|
| 1080 |
+
elif model_filter not in results_file:
|
| 1081 |
+
continue
|
| 1082 |
+
|
| 1083 |
+
try:
|
| 1084 |
+
with open(results_file, 'rb') as f:
|
| 1085 |
+
results, learn_options = pickle.load(f)
|
| 1086 |
+
gene_names = None
|
| 1087 |
+
except:
|
| 1088 |
+
with open(results_file, 'rb') as f:
|
| 1089 |
+
# this is when I accidentally saved from the plotting routine and should not generally be needed
|
| 1090 |
+
results, learn_options, gene_names = pickle.load(f)
|
| 1091 |
+
|
| 1092 |
+
for k in list(results.keys()):
|
| 1093 |
+
if append_to_key is not None:
|
| 1094 |
+
k_new = k + "_" + append_to_key
|
| 1095 |
+
else:
|
| 1096 |
+
k_new = k
|
| 1097 |
+
assert k_new not in list(all_results.keys()), "found %s already" % k
|
| 1098 |
+
print("adding key %s (from file %s)" % (k_new, os.path.split(results_file)[-1]))
|
| 1099 |
+
all_results[k_new] = results[k]
|
| 1100 |
+
all_learn_options[k_new] = learn_options[k]
|
| 1101 |
+
num_added = num_added +1
|
| 1102 |
+
|
| 1103 |
+
if num_added==0:
|
| 1104 |
+
raise Exception("found no files to add from dir=%s" % directory)
|
| 1105 |
+
|
| 1106 |
+
return all_results, all_learn_options
|
| 1107 |
+
|
| 1108 |
+
def plot_cluster_results(metrics=['spearmanr', 'NDCG@5'], plots=['boxplots'], directory=r'\\fusi1\crispr2\analysis\cluster\results', results=None, learn_options=None, filter=None):
|
| 1109 |
+
|
| 1110 |
+
all_results = {}
|
| 1111 |
+
all_learn_options = {}
|
| 1112 |
+
|
| 1113 |
+
if results is None:
|
| 1114 |
+
if type(directory) == list:
|
| 1115 |
+
for exp_dir in directory:
|
| 1116 |
+
all_results, all_learn_options = load_results(exp_dir, all_results, all_learn_options, filter)
|
| 1117 |
+
else:
|
| 1118 |
+
all_results, all_learn_options = load_results(directory, all_results, all_learn_options, filter)
|
| 1119 |
+
|
| 1120 |
+
else:
|
| 1121 |
+
for k in list(results.keys()):
|
| 1122 |
+
assert k not in list(all_results.keys())
|
| 1123 |
+
all_results[k] = results[k]
|
| 1124 |
+
all_learn_options[k] = learn_options[k]
|
| 1125 |
+
|
| 1126 |
+
all_metrics, gene_names = get_all_metrics(all_results, test_metrics=metrics)
|
| 1127 |
+
plot_all_metrics(all_metrics, gene_names, all_learn_options, plots=plots, save=False)
|
| 1128 |
+
|
| 1129 |
+
|
| 1130 |
+
def ensemble_cluster_results(directory=r'\\fusi1\crispr2\analysis\cluster\results\cluster_experiment_izf_ob', ensemble_type='median', models_to_ensemble=['all']):
|
| 1131 |
+
all_results = {}
|
| 1132 |
+
all_learn_options = {}
|
| 1133 |
+
|
| 1134 |
+
for results_file in glob.glob(directory+'\\*.pickle'):
|
| 1135 |
+
if 'learn_options' in results_file:
|
| 1136 |
+
continue
|
| 1137 |
+
|
| 1138 |
+
with open(results_file, 'rb') as f:
|
| 1139 |
+
results, learn_options = pickle.load(f)
|
| 1140 |
+
|
| 1141 |
+
for k in list(results.keys()):
|
| 1142 |
+
assert k not in list(all_results.keys())
|
| 1143 |
+
all_results[k] = results[k]
|
| 1144 |
+
all_learn_options[k] = learn_options[k]
|
| 1145 |
+
|
| 1146 |
+
genes = list(all_results[list(all_results.keys())[0]][1][0][0].keys())
|
| 1147 |
+
models = list(all_results.keys())
|
| 1148 |
+
|
| 1149 |
+
ens_predictions = {}
|
| 1150 |
+
ens_truths = {}
|
| 1151 |
+
for g, gene in enumerate(genes):
|
| 1152 |
+
test_predictions = None
|
| 1153 |
+
cv_predictions = None
|
| 1154 |
+
cv_truth = None
|
| 1155 |
+
|
| 1156 |
+
prev_model_truth = None
|
| 1157 |
+
for i, model in enumerate(models):
|
| 1158 |
+
if len([m for m in models_to_ensemble if m in model]) == 0:
|
| 1159 |
+
continue
|
| 1160 |
+
|
| 1161 |
+
truth, predictions = all_results[model][1][0]
|
| 1162 |
+
|
| 1163 |
+
if test_predictions == None:
|
| 1164 |
+
test_predictions = predictions[gene][:,None]
|
| 1165 |
+
else:
|
| 1166 |
+
test_predictions = np.append(test_predictions, predictions[gene][:,None], axis=1)
|
| 1167 |
+
|
| 1168 |
+
# this is just to check that all the models are using the same ordering of
|
| 1169 |
+
# the ground truth and hence of the samples, as this might mess up the ensemble.
|
| 1170 |
+
if prev_model_truth is not None:
|
| 1171 |
+
assert np.all(truth[gene]['ranks'] == prev_model_truth)
|
| 1172 |
+
else:
|
| 1173 |
+
prev_model_truth = truth[gene]['ranks']
|
| 1174 |
+
|
| 1175 |
+
# take all the other genes and stack the predictions under a given model.
|
| 1176 |
+
cv_predictions_gene_j = np.array([])
|
| 1177 |
+
cv_truth_gene_j = np.array([])
|
| 1178 |
+
for other_gene in genes:
|
| 1179 |
+
if gene == other_gene:
|
| 1180 |
+
continue
|
| 1181 |
+
cv_predictions_gene_j = np.append(cv_predictions_gene_j, predictions[other_gene])
|
| 1182 |
+
cv_truth_gene_j = np.append(cv_truth_gene_j, truth[other_gene]['ranks'])
|
| 1183 |
+
|
| 1184 |
+
if cv_truth is None:
|
| 1185 |
+
cv_truth = cv_truth_gene_j.copy()[:, None]
|
| 1186 |
+
|
| 1187 |
+
|
| 1188 |
+
if cv_predictions is None:
|
| 1189 |
+
cv_predictions = cv_predictions_gene_j[:, None]
|
| 1190 |
+
else:
|
| 1191 |
+
cv_predictions = np.append(cv_predictions, cv_predictions_gene_j[:,None],
|
| 1192 |
+
axis=1)
|
| 1193 |
+
|
| 1194 |
+
if ensemble_type == 'majority':
|
| 1195 |
+
y_pred = ensembles.pairwise_majority_voting(test_predictions)
|
| 1196 |
+
if ensemble_type == 'median':
|
| 1197 |
+
y_pred = ensembles.median(test_predictions)
|
| 1198 |
+
if ensemble_type == 'stacking':
|
| 1199 |
+
y_pred = ensembles.linear_stacking(cv_truth, cv_predictions, test_predictions)
|
| 1200 |
+
|
| 1201 |
+
ens_predictions[gene] = y_pred
|
| 1202 |
+
ens_truths[gene] = truth[gene]
|
| 1203 |
+
|
| 1204 |
+
all_results[ensemble_type] = [None, [[ens_truths, ens_predictions]], None, None]
|
| 1205 |
+
all_learn_options[ensemble_type] = None
|
| 1206 |
+
# spearmans = []
|
| 1207 |
+
# for gene in ens_predictions.keys():
|
| 1208 |
+
# spearmans.append(sp.stats.spearmanr(ens_predictions[gene], ens_truths[gene]['raw'])[0])
|
| 1209 |
+
# print gene, spearmans[-1]
|
| 1210 |
+
# print "median: %.5f" % np.median(spearmans)
|
| 1211 |
+
|
| 1212 |
+
return all_results, all_learn_options
|
| 1213 |
+
|
| 1214 |
+
# def plot_old_vs_new_feat(results, models, fontsize=20, filename=None, print_output=False):
|
| 1215 |
+
#
|
| 1216 |
+
# model_names = []
|
| 1217 |
+
# for model in models:
|
| 1218 |
+
# if 'doench' in model:
|
| 1219 |
+
# model_names.append('SVM + LogReg')
|
| 1220 |
+
# elif 'AB_' in model:
|
| 1221 |
+
# model_names.append('AdaBoost DT')
|
| 1222 |
+
# else:
|
| 1223 |
+
# model_names.append(model)
|
| 1224 |
+
#
|
| 1225 |
+
# base_spearman_means = []
|
| 1226 |
+
# base_AUC_means = []
|
| 1227 |
+
# feat_spearman_means = []
|
| 1228 |
+
# feat_AUC_means = []
|
| 1229 |
+
# base_spearman_std = []
|
| 1230 |
+
# feat_spearman_std = []
|
| 1231 |
+
# base_AUC_se = []
|
| 1232 |
+
# feat_AUC_se = []
|
| 1233 |
+
#
|
| 1234 |
+
# for model in models:
|
| 1235 |
+
# metrics = get_all_metrics({model: results[model]}, test_metrics=['spearmanr', 'AUC'])[0][model]
|
| 1236 |
+
# metrics_feat = get_all_metrics({model + '_feat': results[model + "_feat"]}, test_metrics=['spearmanr', 'AUC'])[0][model + '_feat']
|
| 1237 |
+
#
|
| 1238 |
+
# base_spearman_means.append(np.mean(metrics['spearmanr']))
|
| 1239 |
+
# base_spearman_std.append(np.std(metrics['spearmanr']))
|
| 1240 |
+
# base_AUC_means.append(np.mean(metrics['AUC']))
|
| 1241 |
+
# base_AUC_se.append(np.std(metrics['AUC']))
|
| 1242 |
+
#
|
| 1243 |
+
# feat_spearman_means.append(np.mean(metrics_feat['spearmanr']))
|
| 1244 |
+
# feat_spearman_std.append(np.std(metrics_feat['spearmanr']))
|
| 1245 |
+
# feat_AUC_means.append(np.mean(metrics_feat['AUC']))
|
| 1246 |
+
# feat_AUC_se.append(np.std(metrics_feat['AUC']))
|
| 1247 |
+
#
|
| 1248 |
+
#
|
| 1249 |
+
# print("old features")
|
| 1250 |
+
# print("mean: " + str(base_spearman_means))
|
| 1251 |
+
# print("std: " + str(base_spearman_std))
|
| 1252 |
+
#
|
| 1253 |
+
# print("old + new features")
|
| 1254 |
+
# print("mean: " + str(feat_spearman_means))
|
| 1255 |
+
# print("std: " + str(feat_spearman_std))
|
| 1256 |
+
#
|
| 1257 |
+
# plt.figure()
|
| 1258 |
+
# ind = np.arange(len(models))
|
| 1259 |
+
# width = 0.4
|
| 1260 |
+
# plt.bar(ind, base_spearman_means, width, color='#D14B5D', yerr=base_spearman_std, ecolor='k', edgecolor='none', label='Old features')
|
| 1261 |
+
# plt.bar(ind+width, feat_spearman_means, width, color='#852230', yerr=feat_spearman_std, ecolor='k', edgecolor='none', label='Old + new features')
|
| 1262 |
+
# ax = plt.gca()
|
| 1263 |
+
# ax.set_ylabel('Spearman r', fontsize=fontsize)
|
| 1264 |
+
# ax.set_xticks(ind+width)
|
| 1265 |
+
# ax.set_xticklabels(model_names, fontsize=fontsize)
|
| 1266 |
+
# plt.legend(loc=0, fontsize=fontsize)
|
| 1267 |
+
# plt.yticks(fontsize=fontsize)
|
| 1268 |
+
# plt.ylim((0.0, 0.7))
|
| 1269 |
+
# remove_top_right_on_plot()
|
| 1270 |
+
# if filename is not None:
|
| 1271 |
+
# plt.savefig(filename + '_spearman.pdf')
|
| 1272 |
+
#
|
| 1273 |
+
# plt.figure()
|
| 1274 |
+
# ind = np.arange(len(models))
|
| 1275 |
+
# width = 0.4
|
| 1276 |
+
# plt.bar(ind, base_AUC_means, width, color='#D14B5D', yerr=base_AUC_se, ecolor='k', edgecolor='none', label='Old features')
|
| 1277 |
+
# plt.bar(ind+width, feat_AUC_means, width, color='#852230', yerr=feat_AUC_se, ecolor='k', edgecolor='none', label='Old + new features')
|
| 1278 |
+
# ax = plt.gca()
|
| 1279 |
+
# ax.set_ylabel('AUC', fontsize=fontsize)
|
| 1280 |
+
# ax.set_xticks(ind+width)
|
| 1281 |
+
# ax.set_xticklabels(model_names, fontsize=fontsize)
|
| 1282 |
+
# plt.legend(loc=0)
|
| 1283 |
+
# plt.ylim((0.5, 0.85))
|
| 1284 |
+
# plt.legend(loc=0, fontsize=fontsize)
|
| 1285 |
+
# plt.yticks(fontsize=fontsize)
|
| 1286 |
+
# remove_top_right_on_plot()
|
| 1287 |
+
# if filename is not None:
|
| 1288 |
+
# plt.savefig(filename + '_AUC.pdf')
|
| 1289 |
+
#
|
| 1290 |
+
# # plt.subplots_adjust(top = 0.97, bottom = 0.4)
|
| 1291 |
+
|
| 1292 |
+
|
| 1293 |
+
# def remove_top_right_on_plot(ax=None):
|
| 1294 |
+
# if ax==None:
|
| 1295 |
+
# # ax = plt.gca()
|
| 1296 |
+
# ax.xaxis.set_ticks_position('bottom')
|
| 1297 |
+
# ax.yaxis.set_ticks_position('left')
|
| 1298 |
+
# ax.spines['right'].set_visible(False)
|
| 1299 |
+
# ax.spines['top'].set_visible(False)
|
| 1300 |
+
|
| 1301 |
+
|
| 1302 |
+
if __name__ == '__main__':
|
| 1303 |
+
get_thirty_one_mer_data(); import ipdb; ipdb.set_trace()
|
| 1304 |
+
|
| 1305 |
+
# v3_v3_a_feat = 'tests/ens/'
|
| 1306 |
+
# v3_v3_d_feat = 'tests/ens2/'
|
| 1307 |
+
# # v3_v3_a_feat = r'\\fusi1\crispr2\analysis\cluster\results\cluster_experiment_flmrsw'
|
| 1308 |
+
# all_results, all_learn_options = {}, {}
|
| 1309 |
+
# all_results, all_learn_options = util.load_results(v3_v3_a_feat, all_results, all_learn_options, model_filter=None, append_to_key='feat')
|
| 1310 |
+
# results = dict([('AB', all_results['AB_or2_md3_lr0.10_n100_V3_on_V3_feat'])])
|
| 1311 |
+
# df = feature_importances(results)
|
| 1312 |
+
# all_results, all_learn_options = ensemble_cluster_results(directory=[v3_v3_a_feat], ensemble_type='SVM')
|
| 1313 |
+
# plot_cluster_results(results=all_results, learn_options=all_learn_options, metrics=['AUC', 'spearmanr'])
|
| 1314 |
+
# plot_cluster_results(directory=r'\\fusi1\crispr2\analysis\cluster\results')
|
| 1315 |
+
# all_results = ensemble_cluster_results(ensemble_type='stacking', models_to_ensemble=['L1', 'L2'])
|
| 1316 |
+
# all_metrics, gene_names = get_all_metrics(all_results)
|
| 1317 |
+
# plot_all_metrics(all_metrics, gene_names, None, save=False)
|
| 1318 |
+
#V = "0"
|
| 1319 |
+
V = "1"
|
| 1320 |
+
if V=="1":
|
| 1321 |
+
human_data = pandas.read_excel("data/V1_data.xlsx", sheetname=0, index_col=[0,1])
|
| 1322 |
+
mouse_data = pandas.read_excel("data/V1_data.xlsx", sheetname=1, index_col=[0,1])
|
| 1323 |
+
X, Y = combine_organisms()
|
| 1324 |
+
X.to_pickle('../data/X.pd') #sequence features (i.e. inputs to prediction)
|
| 1325 |
+
Y.to_pickle('../data/Y.pd') #cell-averaged ranks, plus more (i.e. possible targets for prediction)
|
| 1326 |
+
print("done writing to file")
|
| 1327 |
+
elif V =="2":
|
| 1328 |
+
# this is now all in predict.py
|
| 1329 |
+
pass
|
| 1330 |
+
elif V=="0":
|
| 1331 |
+
pass
|
|
@@ -32,95 +32,3 @@ def show_error(settings, message, e):
|
|
| 32 |
print(f"Error showing error message: {e}")
|
| 33 |
|
| 34 |
exit(-1)
|
| 35 |
-
|
| 36 |
-
def scale_ui(window, base_width=1920, base_height=1080, font_size=12, header_font_size=30, custom_scale_width=None, custom_scale_height=None):
|
| 37 |
-
try:
|
| 38 |
-
# Get the primary screen
|
| 39 |
-
screen = QtGui.QGuiApplication.primaryScreen()
|
| 40 |
-
screen_geometry = screen.geometry()
|
| 41 |
-
width = screen_geometry.width()
|
| 42 |
-
height = screen_geometry.height()
|
| 43 |
-
|
| 44 |
-
# Font scaling
|
| 45 |
-
window.centralWidget().setStyleSheet(f"font: {font_size}pt 'Arial';")
|
| 46 |
-
|
| 47 |
-
if hasattr(window, 'title'):
|
| 48 |
-
scaled_title_font_size = int(header_font_size * (width / base_width))
|
| 49 |
-
window.title.setStyleSheet(f"font: bold {scaled_title_font_size}pt 'Arial';")
|
| 50 |
-
|
| 51 |
-
# Calculate sizes
|
| 52 |
-
scaledWidth = int((width * (custom_scale_width if custom_scale_width else 1150)) / base_width)
|
| 53 |
-
scaledHeight = int((height * (custom_scale_height if custom_scale_height else 650)) / base_height)
|
| 54 |
-
|
| 55 |
-
# Ensure minimum size
|
| 56 |
-
window.adjustSize()
|
| 57 |
-
currentWidth = window.size().width()
|
| 58 |
-
currentHeight = window.size().height()
|
| 59 |
-
|
| 60 |
-
if scaledHeight < currentHeight:
|
| 61 |
-
scaledHeight = currentHeight
|
| 62 |
-
if scaledWidth < currentWidth:
|
| 63 |
-
scaledWidth = currentWidth
|
| 64 |
-
|
| 65 |
-
# Resize in a single operation
|
| 66 |
-
window.resize(scaledWidth, scaledHeight)
|
| 67 |
-
|
| 68 |
-
except Exception as e:
|
| 69 |
-
print(f"Error in scale_ui: {e}")
|
| 70 |
-
|
| 71 |
-
def center_ui(window):
|
| 72 |
-
try:
|
| 73 |
-
window.repaint()
|
| 74 |
-
QtWidgets.QApplication.processEvents()
|
| 75 |
-
|
| 76 |
-
# Get the dimensions of the window
|
| 77 |
-
width = window.width()
|
| 78 |
-
height = window.height()
|
| 79 |
-
|
| 80 |
-
# Get the primary screen
|
| 81 |
-
screen = QtGui.QGuiApplication.primaryScreen()
|
| 82 |
-
|
| 83 |
-
# Get the geometry of the screen
|
| 84 |
-
screen_geometry = screen.geometry()
|
| 85 |
-
centerPoint = screen_geometry.center()
|
| 86 |
-
|
| 87 |
-
# Calculate new x and y coordinates
|
| 88 |
-
x = centerPoint.x() - (width // 2)
|
| 89 |
-
y = centerPoint.y() - (height // 2)
|
| 90 |
-
|
| 91 |
-
# Set the new geometry for the window
|
| 92 |
-
window.setGeometry(x, y, width, height)
|
| 93 |
-
window.repaint()
|
| 94 |
-
except Exception as e:
|
| 95 |
-
print(f"Error centering window: {e}")
|
| 96 |
-
|
| 97 |
-
def position_window(new_window, parent_window=None):
|
| 98 |
-
# Check if the window is already visible and active
|
| 99 |
-
if new_window.view.isVisible() and new_window.view.isActiveWindow():
|
| 100 |
-
# If the window is already visible and active, just ensure it's in the foreground
|
| 101 |
-
new_window.view.raise_()
|
| 102 |
-
new_window.view.activateWindow()
|
| 103 |
-
QtWidgets.QApplication.setActiveWindow(new_window.view)
|
| 104 |
-
return
|
| 105 |
-
|
| 106 |
-
if parent_window is None:
|
| 107 |
-
parent_window = QtWidgets.QApplication.activeWindow()
|
| 108 |
-
|
| 109 |
-
if parent_window:
|
| 110 |
-
if hasattr(parent_window, 'last_position') and parent_window.last_position:
|
| 111 |
-
new_window.view.move(parent_window.last_position)
|
| 112 |
-
else:
|
| 113 |
-
parent_geo = parent_window.geometry()
|
| 114 |
-
new_window.view.move(parent_geo.x() + 50, parent_geo.y() + 50)
|
| 115 |
-
else:
|
| 116 |
-
center_ui(new_window)
|
| 117 |
-
|
| 118 |
-
new_window.view.show()
|
| 119 |
-
new_window.view.raise_()
|
| 120 |
-
new_window.view.activateWindow()
|
| 121 |
-
QtWidgets.QApplication.setActiveWindow(new_window.view)
|
| 122 |
-
|
| 123 |
-
# Force the window to be active and in the foreground
|
| 124 |
-
new_window.view.setWindowState(new_window.view.windowState() & ~QtCore.Qt.WindowState.WindowMinimized | QtCore.Qt.WindowState.WindowActive)
|
| 125 |
-
new_window.view.raise_()
|
| 126 |
-
new_window.view.activateWindow()
|
|
|
|
| 32 |
print(f"Error showing error message: {e}")
|
| 33 |
|
| 34 |
exit(-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1,429 +0,0 @@
|
|
| 1 |
-
###############################################################################
|
| 2 |
-
# INPUTS: inputs are the annotation files to parse. Currently, only gbff is supported.
|
| 3 |
-
# OUTPUTS: the outputs are data structures that store the parsed data
|
| 4 |
-
################################################################################
|
| 5 |
-
|
| 6 |
-
from PyQt5 import QtWidgets
|
| 7 |
-
import gffutils
|
| 8 |
-
import models.GlobalSettings as GlobalSettings
|
| 9 |
-
import os
|
| 10 |
-
from Bio import SeqIO
|
| 11 |
-
import traceback
|
| 12 |
-
|
| 13 |
-
logger = GlobalSettings.logger
|
| 14 |
-
|
| 15 |
-
class Annotation_Parser:
|
| 16 |
-
def __init__(self):
|
| 17 |
-
try:
|
| 18 |
-
#variables to use
|
| 19 |
-
self.annotationFileName = "" #this is the variable that holds the filename itself
|
| 20 |
-
self.txtLocusTag = False
|
| 21 |
-
self.isGff = False
|
| 22 |
-
self.isTxt = False
|
| 23 |
-
self.max_chrom = 0
|
| 24 |
-
|
| 25 |
-
#dictionary used for finding the genes in a txt annotation file
|
| 26 |
-
#key: locus_tag
|
| 27 |
-
#value: List of lists
|
| 28 |
-
# essentially its all based on locus tag. So the key is the locus tag, and its data is:
|
| 29 |
-
# [genomic accession, int, start, end, +\-]
|
| 30 |
-
self.reg_dict = dict()
|
| 31 |
-
|
| 32 |
-
#parallel dictionary used for the txt annotaion file
|
| 33 |
-
#key: name + symbol (space in between each word)
|
| 34 |
-
#value: locus_tag (indexes dict)
|
| 35 |
-
self.para_dict = dict()
|
| 36 |
-
|
| 37 |
-
#list of tuples containing (chromosome/scaffold # {int}, Feature matching search criteria {SeqFeature Object})
|
| 38 |
-
self.results_list = list()
|
| 39 |
-
|
| 40 |
-
except Exception as e:
|
| 41 |
-
logger.critical("Error initializing Annotation_Parser class.")
|
| 42 |
-
logger.critical(e)
|
| 43 |
-
logger.critical(traceback.format_exc())
|
| 44 |
-
msgBox = QtWidgets.QMessageBox()
|
| 45 |
-
msgBox.setStyleSheet("font: " + str(GlobalSettings.mainWindow.fontSize) + "pt 'Arial'")
|
| 46 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
|
| 47 |
-
msgBox.setWindowTitle("Fatal Error")
|
| 48 |
-
msgBox.setText("Fatal Error:\n"+str(e)+ "\n\nFor more information on this error, look at CASPER.log in the application folder.")
|
| 49 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
|
| 50 |
-
msgBox.exec()
|
| 51 |
-
|
| 52 |
-
exit(-1)
|
| 53 |
-
|
| 54 |
-
### This function takes a list of lists and flattens it into a single list. Useful when dealing with a list of lists where the nested lists only have 1 entry.
|
| 55 |
-
def flatten_list(self,t):
|
| 56 |
-
return [item.lower() for sublist in t for item in sublist]
|
| 57 |
-
|
| 58 |
-
### This function finds how many chromosomes are within the selcted annotation file and returns the value
|
| 59 |
-
def get_max_chrom(self):
|
| 60 |
-
parser = SeqIO.parse(self.annotationFileName, 'genbank') # Initialize parser (iterator) for each query
|
| 61 |
-
for i, record in enumerate(parser):
|
| 62 |
-
max_chrom = i+1
|
| 63 |
-
return max_chrom
|
| 64 |
-
|
| 65 |
-
def get_sequence_info(self, query):
|
| 66 |
-
try:
|
| 67 |
-
self.results_list.clear()
|
| 68 |
-
parser = SeqIO.parse(self.annotationFileName, 'genbank') # Initialize parser (iterator) for each query
|
| 69 |
-
for j,record in enumerate(parser): # Each record corresponds to a chromosome/scaffold in the FNA/FASTA file
|
| 70 |
-
tmp = str(record.seq).find(query)
|
| 71 |
-
if tmp != -1: # If match is found
|
| 72 |
-
return (j+1,tmp+1,tmp+len(query)) # Chromosome number, start index, stop index
|
| 73 |
-
else:
|
| 74 |
-
tmp = str(record.seq.reverse_complement()).find(query) # Check the reverse complement now
|
| 75 |
-
if tmp != -1: # If match is found
|
| 76 |
-
return (j+1,tmp-len(query),tmp-1) # Chromosome number, start index, stop index
|
| 77 |
-
else:
|
| 78 |
-
continue
|
| 79 |
-
return False
|
| 80 |
-
|
| 81 |
-
except Exception as e:
|
| 82 |
-
logger.critical("Error in get_sequence_info() in annotation parser.")
|
| 83 |
-
logger.critical(e)
|
| 84 |
-
logger.critical(traceback.format_exc())
|
| 85 |
-
msgBox = QtWidgets.QMessageBox()
|
| 86 |
-
msgBox.setStyleSheet("font: " + str(GlobalSettings.mainWindow.fontSize) + "pt 'Arial'")
|
| 87 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
|
| 88 |
-
msgBox.setWindowTitle("Fatal Error")
|
| 89 |
-
msgBox.setText("Fatal Error:\n"+str(e)+ "\n\nFor more information on this error, look at CASPER.log in the application folder.")
|
| 90 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
|
| 91 |
-
msgBox.exec()
|
| 92 |
-
|
| 93 |
-
exit(-1)
|
| 94 |
-
|
| 95 |
-
### The workhorse function of AnnotationParser, this searches the annotation file for the user's search and returns features matching the description.
|
| 96 |
-
def genbank_search(self, queries, same_search):
|
| 97 |
-
index_number = 0
|
| 98 |
-
try:
|
| 99 |
-
if same_search: # If searching for the same thing, just return the results from last time
|
| 100 |
-
return self.results_list
|
| 101 |
-
else:
|
| 102 |
-
self.results_list.clear()
|
| 103 |
-
for i, query in enumerate(queries):
|
| 104 |
-
parser = SeqIO.parse(self.annotationFileName, 'genbank') # Initialize parser (iterator) for each query
|
| 105 |
-
for j,record in enumerate(parser): # Each record corresponds to a chromosome/scaffold in the FNA/FASTA file
|
| 106 |
-
if i == 0:
|
| 107 |
-
index_number += 1
|
| 108 |
-
for feature in record.features: # Each feature corresponds to a gene, tRNA, rep_origin, etc. in the given record (chromosome/scaffold)
|
| 109 |
-
if "translation" in feature.qualifiers:
|
| 110 |
-
if query.lower() in " ".join(self.flatten_list(feature.qualifiers.values())[:-1]) and feature.type != "source" and feature.type != "gene": # If search matches the feature's qualifiers somewhere, save it
|
| 111 |
-
self.results_list.append((j+1,feature))
|
| 112 |
-
else: # If search not in the feature's qualifiers, move to the next feature
|
| 113 |
-
continue
|
| 114 |
-
else:
|
| 115 |
-
if query.lower() in " ".join(self.flatten_list(feature.qualifiers.values())) and feature.type != "source" and feature.type != "gene": # If search matches the feature's qualifiers somewhere, save it
|
| 116 |
-
self.results_list.append((j+1,feature))
|
| 117 |
-
else: # If search not in the feature's qualifiers, move to the next feature
|
| 118 |
-
continue
|
| 119 |
-
self.max_chrom = index_number # Counts the number of chromosomes/scaffolds in the organism (only do this once, even if there are multiple queries)
|
| 120 |
-
else:
|
| 121 |
-
for feature in record.features:
|
| 122 |
-
if "translation" in feature.qualifiers:
|
| 123 |
-
if query.lower() in " ".join(self.flatten_list(feature.qualifiers.values())[:-1]) and feature.type != "source" and feature.type != "gene": # If search matches the feature's qualifiers somewhere, save it
|
| 124 |
-
self.results_list.append((j+1,feature))
|
| 125 |
-
else: # If search not in the feature's qualifiers, move to the next feature
|
| 126 |
-
continue
|
| 127 |
-
else:
|
| 128 |
-
if query.lower() in " ".join(self.flatten_list(feature.qualifiers.values())) and feature.type != "source" and feature.type != "gene": # If search matches the feature's qualifiers somewhere, save it
|
| 129 |
-
self.results_list.append((j+1,feature))
|
| 130 |
-
else: # If search not in the feature's qualifiers, move to the next feature
|
| 131 |
-
continue
|
| 132 |
-
return self.results_list
|
| 133 |
-
|
| 134 |
-
except Exception as e:
|
| 135 |
-
logger.critical("Error in genbank_search() in annotation parser.")
|
| 136 |
-
logger.critical(e)
|
| 137 |
-
logger.critical(traceback.format_exc())
|
| 138 |
-
msgBox = QtWidgets.QMessageBox()
|
| 139 |
-
msgBox.setStyleSheet("font: " + str(GlobalSettings.mainWindow.fontSize) + "pt 'Arial'")
|
| 140 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
|
| 141 |
-
msgBox.setWindowTitle("Fatal Error")
|
| 142 |
-
msgBox.setText("Fatal Error:\n"+str(e)+ "\n\nFor more information on this error, look at CASPER.log in the application folder.")
|
| 143 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
|
| 144 |
-
msgBox.exec()
|
| 145 |
-
|
| 146 |
-
exit(-1)
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
# This function parses gff files and stores them in a dictionary
|
| 152 |
-
# It also creates a parallel dictionary to use in searching
|
| 153 |
-
# Precondition: ONLY TO BE USED WITH GFF FILES
|
| 154 |
-
def gff_parse(self):
|
| 155 |
-
try:
|
| 156 |
-
self.reg_dict.clear()
|
| 157 |
-
self.para_dict.clear()
|
| 158 |
-
prevFirstIndex = ""
|
| 159 |
-
indexNumber = 1
|
| 160 |
-
fileStream = open(self.annotationFileName)
|
| 161 |
-
data_base_file_name = GlobalSettings.CSPR_DB + "/" + "gff_database.db"
|
| 162 |
-
|
| 163 |
-
# temp list will be the following each time it is put into the dictionary:
|
| 164 |
-
# [Sequence ID (genomic accession or scaffold), the index number itself, the feature type (cds, gene, mrna), the start(-1), end, and the strand]
|
| 165 |
-
tempList = list()
|
| 166 |
-
currentLocusTag = ""
|
| 167 |
-
para_dict_key_string = ""
|
| 168 |
-
|
| 169 |
-
# initialize the data base (this is what parses it for me)
|
| 170 |
-
print("Intializing the data base")
|
| 171 |
-
db = gffutils.create_db(self.annotationFileName, dbfn=data_base_file_name, force=True, keep_order=True,
|
| 172 |
-
merge_strategy='merge', sort_attribute_values=True)
|
| 173 |
-
print("Finished intializing")
|
| 174 |
-
|
| 175 |
-
# call the feature version of that data base now
|
| 176 |
-
db = gffutils.FeatureDB(data_base_file_name, keep_order=True)
|
| 177 |
-
|
| 178 |
-
# now we go through that data base and get the data we want
|
| 179 |
-
for feature in db.all_features(limit=None, strand=None, featuretype=None, order_by=None, reverse=False,
|
| 180 |
-
completely_within=False):
|
| 181 |
-
# if the genomic accession/scaffold/chromseome changes, update the indexNumber
|
| 182 |
-
if prevFirstIndex != feature.seqid and prevFirstIndex != "":
|
| 183 |
-
indexNumber += 1
|
| 184 |
-
# if we find a new gene, update the locus_tag/name
|
| 185 |
-
if feature.featuretype == "gene" or feature.featuretype == 'pseudogene':
|
| 186 |
-
|
| 187 |
-
# check and see if locus tag is in the attributes, go on the Name if locus_tag is not in there
|
| 188 |
-
if 'locus_tag' in feature.attributes:
|
| 189 |
-
currentLocusTag = feature.attributes['locus_tag'][0]
|
| 190 |
-
else:
|
| 191 |
-
currentLocusTag = feature.attributes["Name"][0]
|
| 192 |
-
|
| 193 |
-
# once the locus tag changes, append it to the para_dict
|
| 194 |
-
if para_dict_key_string != "":
|
| 195 |
-
if para_dict_key_string not in self.para_dict:
|
| 196 |
-
self.para_dict[para_dict_key_string] = list()
|
| 197 |
-
self.para_dict[para_dict_key_string].append(currentLocusTag)
|
| 198 |
-
else:
|
| 199 |
-
if currentLocusTag not in self.para_dict[para_dict_key_string]:
|
| 200 |
-
self.para_dict[para_dict_key_string].append(currentLocusTag)
|
| 201 |
-
para_dict_key_string = ""
|
| 202 |
-
|
| 203 |
-
tempList = [currentLocusTag, indexNumber, feature.featuretype, feature.start - 1, feature.end,
|
| 204 |
-
feature.strand]
|
| 205 |
-
|
| 206 |
-
# insert that locus tag/name into the dictionary
|
| 207 |
-
if currentLocusTag not in self.reg_dict:
|
| 208 |
-
self.reg_dict[currentLocusTag] = []
|
| 209 |
-
self.reg_dict[currentLocusTag].append(tempList)
|
| 210 |
-
elif currentLocusTag in self.reg_dict:
|
| 211 |
-
self.reg_dict[currentLocusTag].append(tempList)
|
| 212 |
-
|
| 213 |
-
# go through each of this child's children
|
| 214 |
-
for child in db.children(feature.id, level=None, featuretype=None, order_by=None, reverse=False,
|
| 215 |
-
limit=None, completely_within=False):
|
| 216 |
-
tempList = [currentLocusTag, indexNumber, child.featuretype, child.start - 1, child.end, child.strand]
|
| 217 |
-
|
| 218 |
-
# only insert it if it hasn't been inserted before
|
| 219 |
-
if tempList not in self.reg_dict[currentLocusTag]:
|
| 220 |
-
self.reg_dict[currentLocusTag].append(tempList)
|
| 221 |
-
|
| 222 |
-
# now go through the other ones which are not region
|
| 223 |
-
elif feature.featuretype != "region" and feature.featuretype != "telomere" and feature.featuretype != "origin_of_replication":
|
| 224 |
-
tempList = [currentLocusTag, indexNumber, feature.featuretype, feature.start - 1, feature.end,
|
| 225 |
-
feature.strand]
|
| 226 |
-
|
| 227 |
-
# only insert if it hasn't been inserted before
|
| 228 |
-
if tempList not in self.reg_dict[currentLocusTag]:
|
| 229 |
-
self.reg_dict[currentLocusTag].append(tempList)
|
| 230 |
-
|
| 231 |
-
# now same as above, go through the children again
|
| 232 |
-
for child in db.children(feature.id, level=None, featuretype=None, order_by=None, reverse=False,
|
| 233 |
-
limit=None, completely_within=False):
|
| 234 |
-
tempList = [currentLocusTag, indexNumber, child.featuretype, child.start - 1, child.end,
|
| 235 |
-
child.strand]
|
| 236 |
-
|
| 237 |
-
if tempList not in self.reg_dict[currentLocusTag]:
|
| 238 |
-
self.reg_dict[currentLocusTag].append(tempList)
|
| 239 |
-
|
| 240 |
-
# now we need to get the para_dict up and running
|
| 241 |
-
# get the stuff out of the product part
|
| 242 |
-
if 'product' in feature.attributes and feature.featuretype == "CDS":
|
| 243 |
-
if para_dict_key_string == "":
|
| 244 |
-
para_dict_key_string = feature.attributes['product'][0]
|
| 245 |
-
else:
|
| 246 |
-
para_dict_key_string = para_dict_key_string + ";" + feature.attributes['product'][0]
|
| 247 |
-
# get the stuff out of the Note part
|
| 248 |
-
if 'Note' in feature.attributes:
|
| 249 |
-
if para_dict_key_string == "":
|
| 250 |
-
para_dict_key_string = feature.attributes['Note'][0]
|
| 251 |
-
else:
|
| 252 |
-
para_dict_key_string = para_dict_key_string + ";" + feature.attributes['Note'][0]
|
| 253 |
-
|
| 254 |
-
prevFirstIndex = feature.seqid
|
| 255 |
-
self.max_chrom = indexNumber
|
| 256 |
-
except Exception as e:
|
| 257 |
-
logger.critical("Error in gff_parse() in annotation parser.")
|
| 258 |
-
logger.critical(e)
|
| 259 |
-
logger.critical(traceback.format_exc())
|
| 260 |
-
msgBox = QtWidgets.QMessageBox()
|
| 261 |
-
msgBox.setStyleSheet("font: " + str(GlobalSettings.mainWindow.fontSize) + "pt 'Arial'")
|
| 262 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
|
| 263 |
-
msgBox.setWindowTitle("Fatal Error")
|
| 264 |
-
msgBox.setText("Fatal Error:\n"+str(e)+ "\n\nFor more information on this error, look at CASPER.log in the application folder.")
|
| 265 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
|
| 266 |
-
msgBox.exec()
|
| 267 |
-
|
| 268 |
-
exit(-1)
|
| 269 |
-
|
| 270 |
-
# This function parses txt files and stores them in a dictionary
|
| 271 |
-
# It also creates a parallel dictionary to use in searching
|
| 272 |
-
# Precondition: ONLY TO BE USED WITH TXT FILES
|
| 273 |
-
def txt_parse(self):
|
| 274 |
-
try:
|
| 275 |
-
self.reg_dict.clear()
|
| 276 |
-
prevGenAccession = ""
|
| 277 |
-
indexNumber = 1
|
| 278 |
-
fileStream = open(self.annotationFileName)
|
| 279 |
-
buffer = ""
|
| 280 |
-
currentLocusTag = ""
|
| 281 |
-
para_dict_key_string = ""
|
| 282 |
-
|
| 283 |
-
while(True): # this loop breaks out when buffer string is empty
|
| 284 |
-
buffer = fileStream.readline()
|
| 285 |
-
|
| 286 |
-
if(buffer.startswith("#")): #skip lines that start with #
|
| 287 |
-
continue
|
| 288 |
-
else:
|
| 289 |
-
if(len(buffer) <= 2): # break out once we reach the end of the file
|
| 290 |
-
break
|
| 291 |
-
|
| 292 |
-
splitLine = buffer[:-1].split("\t")
|
| 293 |
-
|
| 294 |
-
# increment indexNumber when genomic access changes
|
| 295 |
-
if prevGenAccession != splitLine[6] and prevGenAccession != "":
|
| 296 |
-
indexNumber += 1
|
| 297 |
-
|
| 298 |
-
# if parsing on locus_tag, use the locus_tag as the key for the dict
|
| 299 |
-
if self.txtLocusTag:
|
| 300 |
-
currentLocusTag = splitLine[16]
|
| 301 |
-
values = [currentLocusTag, indexNumber, splitLine[0], int(splitLine[7]) - 1, int(splitLine[8]), splitLine[9]]
|
| 302 |
-
|
| 303 |
-
if currentLocusTag not in self.reg_dict:
|
| 304 |
-
self.reg_dict[currentLocusTag] = [values]
|
| 305 |
-
elif currentLocusTag in self.reg_dict:
|
| 306 |
-
self.reg_dict[currentLocusTag].append(values)
|
| 307 |
-
|
| 308 |
-
# if no locus_tag, parse on product_accession, use the product_accession as the key for the dict
|
| 309 |
-
elif not self.txtLocusTag:
|
| 310 |
-
currentLocusTag = splitLine[10]
|
| 311 |
-
values = [currentLocusTag, indexNumber, splitLine[0], int(splitLine[7]) - 1, int(splitLine[8]), splitLine[9]]
|
| 312 |
-
|
| 313 |
-
if currentLocusTag not in self.reg_dict:
|
| 314 |
-
self.reg_dict[currentLocusTag] = [values]
|
| 315 |
-
elif currentLocusTag in self.reg_dict:
|
| 316 |
-
self.reg_dict[currentLocusTag].append(values)
|
| 317 |
-
|
| 318 |
-
if splitLine[13] != '':
|
| 319 |
-
if para_dict_key_string == '':
|
| 320 |
-
para_dict_key_string = splitLine[13] + ';'
|
| 321 |
-
else:
|
| 322 |
-
para_dict_key_string = para_dict_key_string + splitLine[13] + ';'
|
| 323 |
-
|
| 324 |
-
# leaving this in for now, it's related accession
|
| 325 |
-
#if splitLine[12] != '':
|
| 326 |
-
# if para_dict_key_string == '':
|
| 327 |
-
# para_dict_key_string = splitLine[12] + ';'
|
| 328 |
-
# else:
|
| 329 |
-
# para_dict_key_string = para_dict_key_string + splitLine[12] + ';'
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
if splitLine[14] != '':
|
| 333 |
-
if para_dict_key_string == '':
|
| 334 |
-
para_dict_key_string = splitLine[14] + ';'
|
| 335 |
-
else:
|
| 336 |
-
para_dict_key_string = para_dict_key_string + splitLine[14] + ';'
|
| 337 |
-
|
| 338 |
-
para_dict_key_string = para_dict_key_string.replace(',', '')
|
| 339 |
-
# set the parallel dictionary's key string
|
| 340 |
-
#para_dict_key_string = splitLine[13] + ";" + splitLine[12] + ";" + splitLine[14]
|
| 341 |
-
|
| 342 |
-
# if the current line we're on has the data we want for the parellel dictionary, store it
|
| 343 |
-
if len(para_dict_key_string) > 3:
|
| 344 |
-
if para_dict_key_string[len(para_dict_key_string) - 1] == ';':
|
| 345 |
-
para_dict_key_string = para_dict_key_string[0:len(para_dict_key_string) - 1]
|
| 346 |
-
|
| 347 |
-
if para_dict_key_string not in self.para_dict: # make a new input into the dict
|
| 348 |
-
self.para_dict[para_dict_key_string] = [currentLocusTag]
|
| 349 |
-
elif para_dict_key_string in self.para_dict:
|
| 350 |
-
if currentLocusTag not in self.para_dict[para_dict_key_string]:
|
| 351 |
-
# only append it to the dict's list if it isn't currently in there
|
| 352 |
-
self.para_dict[para_dict_key_string].append(currentLocusTag)
|
| 353 |
-
|
| 354 |
-
para_dict_key_string = ""
|
| 355 |
-
prevGenAccession = splitLine[6]
|
| 356 |
-
self.max_chrom = indexNumber
|
| 357 |
-
except Exception as e:
|
| 358 |
-
logger.critical("Error in txt_parse() in annotation parser.")
|
| 359 |
-
logger.critical(e)
|
| 360 |
-
logger.critical(traceback.format_exc())
|
| 361 |
-
msgBox = QtWidgets.QMessageBox()
|
| 362 |
-
msgBox.setStyleSheet("font: " + str(GlobalSettings.mainWindow.fontSize) + "pt 'Arial'")
|
| 363 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
|
| 364 |
-
msgBox.setWindowTitle("Fatal Error")
|
| 365 |
-
msgBox.setText("Fatal Error:\n"+str(e)+ "\n\nFor more information on this error, look at CASPER.log in the application folder.")
|
| 366 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
|
| 367 |
-
msgBox.exec()
|
| 368 |
-
|
| 369 |
-
exit(-1)
|
| 370 |
-
|
| 371 |
-
# This function checks to see which file we are parsing
|
| 372 |
-
# It also checks whether to parse based on locus_tag or product accession (txt files only)
|
| 373 |
-
# Then it calls the respective parser functions used
|
| 374 |
-
def find_which_file_version(self):
|
| 375 |
-
try:
|
| 376 |
-
if self.annotationFileName == "" or GlobalSettings.mainWindow.annotation_files.currentText() == "None":
|
| 377 |
-
return -1
|
| 378 |
-
if "gff" in self.annotationFileName:
|
| 379 |
-
### gff file support currently deprecated
|
| 380 |
-
"""
|
| 381 |
-
self.isGff = True
|
| 382 |
-
self.gff_parse()
|
| 383 |
-
"""
|
| 384 |
-
print("Error: Wrong annotation file format")
|
| 385 |
-
return -1
|
| 386 |
-
|
| 387 |
-
elif "feature_table" in self.annotationFileName:
|
| 388 |
-
### feature table file support currently deprecated
|
| 389 |
-
# now that we know it's a txt file and not a gff, check and see if we will be parsing by locus tag or
|
| 390 |
-
# product accession
|
| 391 |
-
"""
|
| 392 |
-
fileStream = open(self.annotationFileName)
|
| 393 |
-
|
| 394 |
-
#skip all of the lines that start with #
|
| 395 |
-
buf = fileStream.readline()
|
| 396 |
-
while buf.startswith("#"):
|
| 397 |
-
buf = fileStream.readline()
|
| 398 |
-
|
| 399 |
-
# split it and see if the locus tag spot has data in it
|
| 400 |
-
split = buf.split("\t")
|
| 401 |
-
if split[16] != "": # if it does, we are parsing based on locus_tag
|
| 402 |
-
self.txtLocusTag = True
|
| 403 |
-
elif split[16] == "": # if not, we are parsing based on product accession
|
| 404 |
-
self.txtLocusTag = False
|
| 405 |
-
fileStream.close()
|
| 406 |
-
self.isTxt = True
|
| 407 |
-
self.txt_parse()
|
| 408 |
-
"""
|
| 409 |
-
print("Error: Wrong annotation file format")
|
| 410 |
-
return -1
|
| 411 |
-
elif "gbff" or "gbk" in self.annotationFileName:
|
| 412 |
-
return "gbff"
|
| 413 |
-
# return -1 to throw the error window in main
|
| 414 |
-
else:
|
| 415 |
-
return -1
|
| 416 |
-
except Exception as e:
|
| 417 |
-
logger.critical("Error in find_which_file_version() in annotation parser.")
|
| 418 |
-
logger.critical(e)
|
| 419 |
-
logger.critical(traceback.format_exc())
|
| 420 |
-
msgBox = QtWidgets.QMessageBox()
|
| 421 |
-
msgBox.setStyleSheet("font: " + str(GlobalSettings.mainWindow.fontSize) + "pt 'Arial'")
|
| 422 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
|
| 423 |
-
msgBox.setWindowTitle("Fatal Error")
|
| 424 |
-
msgBox.setText("Fatal Error:\n"+str(e)+ "\n\nFor more information on this error, look at CASPER.log in the application folder.")
|
| 425 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
|
| 426 |
-
msgBox.exec()
|
| 427 |
-
|
| 428 |
-
exit(-1)
|
| 429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1,987 +0,0 @@
|
|
| 1 |
-
import platform
|
| 2 |
-
# import controllers.ncbi as ncbi
|
| 3 |
-
import os
|
| 4 |
-
# from utils.Algorithms import get_table_headers
|
| 5 |
-
# from models.CSPRparser import CSPRparser
|
| 6 |
-
import glob
|
| 7 |
-
import models.GlobalSettings as GlobalSettings
|
| 8 |
-
from PyQt6 import QtWidgets, QtGui, QtCore, uic, QtGui
|
| 9 |
-
from utils.ui import scale_ui, center_ui, show_message, show_error
|
| 10 |
-
# from views.annotation_functions import *
|
| 11 |
-
# from views.AnnotationParser import Annotation_Parser
|
| 12 |
-
# from views.AnnotationWindow import AnnotationWindow
|
| 13 |
-
# import views.genomeBrowser as genomeBrowser
|
| 14 |
-
# from views.NewGenome import NewGenome
|
| 15 |
-
# from views.NewEndonuclease import NewEndonuclease
|
| 16 |
-
# from controllers.CoTargeting import CoTargeting
|
| 17 |
-
# from views.generateLib import genLibrary
|
| 18 |
-
# from controllers.Results import Results
|
| 19 |
-
# from views.export_tool import export_tool
|
| 20 |
-
# from views.closingWin import closingWindow
|
| 21 |
-
# from utils.web import ncbi_page, repo_page, ncbi_blast_page
|
| 22 |
-
# from controllers.populate_fna_files import PopulateFNAFiles
|
| 23 |
-
|
| 24 |
-
# logger = GlobalSettings.logger
|
| 25 |
-
|
| 26 |
-
fontSize = 12
|
| 27 |
-
|
| 28 |
-
class CMainWindow(QtWidgets.QMainWindow):
|
| 29 |
-
def __init__(self, settings):
|
| 30 |
-
try:
|
| 31 |
-
super(CMainWindow, self).__init__()
|
| 32 |
-
# uic.loadUi(os.path.join(self.settings.get_ui_dir(), 'startupCASPER.ui'), self)
|
| 33 |
-
# uic.loadUi(GlobalSettings.appdir + 'ui/CASPER_main.ui', self)
|
| 34 |
-
# print("path: ", GlobalSettings.appdir + 'ui/CASPER_main_copy_2.ui')
|
| 35 |
-
# uic.loadUi(GlobalSettings.appdir + 'ui/CASPER_main_copy_2.ui', self)
|
| 36 |
-
self.settings = settings
|
| 37 |
-
print("path: ", os.path.join(self.settings.get_ui_dir(), 'CASPER_main.ui'))
|
| 38 |
-
uic.loadUi(os.path.join(self.settings.get_ui_dir(), 'CASPER_main.ui'), self)
|
| 39 |
-
self.setWindowTitle("CASPER")
|
| 40 |
-
self.setWindowIcon(QtGui.QIcon(os.path.join(self.settings.get_assets_dir(), "cas9image.ico")))
|
| 41 |
-
|
| 42 |
-
# self.dbpath = ""
|
| 43 |
-
# self.inputstring = "" # This is the search string
|
| 44 |
-
# # self.info_path = settings.get_app_dir()
|
| 45 |
-
# # info_path = settings.get_app_dir()
|
| 46 |
-
# self.anno_name = ""
|
| 47 |
-
# self.endo_name = ""
|
| 48 |
-
# self.fontSize = 12
|
| 49 |
-
# self.org = ""
|
| 50 |
-
# self.TNumbers = {} # the T numbers from a kegg search
|
| 51 |
-
# self.orgcodes = {} # Stores the Kegg organism code by the format {full name : organism code}
|
| 52 |
-
# self.gene_list = {} # list of genes (no ides what they pertain to
|
| 53 |
-
# self.searches = {}
|
| 54 |
-
# self.checkBoxes = []
|
| 55 |
-
# self.genlib_list = [] # This list stores selected SeqFeatures from annotation window
|
| 56 |
-
# self.checked_info = {}
|
| 57 |
-
# self.check_ntseq_info = {} # the ntsequences that go along with the checked_info
|
| 58 |
-
# self.annotation_parser = Annotation_Parser()
|
| 59 |
-
# self.link_list = list() # the list of the downloadable links from the NCBI search
|
| 60 |
-
# self.organismDict = dict() # the dictionary for the links to download. Key is the description of the organism, value is the ID that can be found in link_list
|
| 61 |
-
# self.results_list = list()
|
| 62 |
-
# self.organismData = list()
|
| 63 |
-
# self.ncbi = ncbi.NCBI_search_tool()
|
| 64 |
-
|
| 65 |
-
# groupbox_style = """
|
| 66 |
-
# QGroupBox:title{subcontrol-origin: margin;
|
| 67 |
-
# left: 10px;
|
| 68 |
-
# padding: 0 5px 0 5px;}
|
| 69 |
-
# QGroupBox#Step1{border: 2px solid rgb(111,181,110);
|
| 70 |
-
# border-radius: 9px;
|
| 71 |
-
# margin-top: 10px;
|
| 72 |
-
# font: bold 14pt 'Arial';}
|
| 73 |
-
# """
|
| 74 |
-
|
| 75 |
-
# self.Step1.setStyleSheet(groupbox_style)
|
| 76 |
-
# self.Step2.setStyleSheet(groupbox_style.replace("Step1", "Step2"))
|
| 77 |
-
# self.Step3.setStyleSheet(groupbox_style.replace("Step1", "Step3"))
|
| 78 |
-
# self.CASPER_Navigation.setStyleSheet(groupbox_style.replace("Step1", "CASPER_Navigation").replace("solid","dashed").replace("rgb(111,181,110)","rgb(88,89,91)"))
|
| 79 |
-
|
| 80 |
-
# self.setWindowIcon(QtGui.QIcon(GlobalSettings.appdir + "cas9image.ico"))
|
| 81 |
-
# self.pushButton_FindTargets.clicked.connect(self.gather_settings)
|
| 82 |
-
# self.pushButton_ViewTargets.clicked.connect(self.view_results)
|
| 83 |
-
# self.pushButton_ViewTargets.setEnabled(False)
|
| 84 |
-
# self.GenerateLibrary.setEnabled(False)
|
| 85 |
-
# self.radioButton_Gene.clicked.connect(self.toggle_annotation)
|
| 86 |
-
# self.radioButton_Position.clicked.connect(self.toggle_annotation)
|
| 87 |
-
|
| 88 |
-
""" Connect functions to buttons """
|
| 89 |
-
# self.newGenome_button.clicked.connect(self.launch_newGenome) # Connect launch function to New Genome
|
| 90 |
-
# self.newEndo_button.clicked.connect(self.launch_newEndonuclease) # Connect launch function to New Endonuclease
|
| 91 |
-
# self.multitargeting_button.clicked.connect(self.changeto_multitargeting) # Connect launch function to Multitargeting
|
| 92 |
-
# self.populationAnalysis_button.clicked.connect(self.changeto_population_Analysis) # Connect launch function to PA
|
| 93 |
-
# self.GenerateLibrary.clicked.connect(self.prep_genlib)
|
| 94 |
-
# self.combineFiles_button.clicked.connect(self.launch_populate_fna_files)
|
| 95 |
-
|
| 96 |
-
""" Connect functions to actions (menu bar) """
|
| 97 |
-
# self.actionOpen_Genome_Browser.triggered.connect(self.launch_newGenomeBrowser)
|
| 98 |
-
# self.actionExit.triggered.connect(self.close_app)
|
| 99 |
-
# self.visit_repo.triggered.connect(repo_page)
|
| 100 |
-
# self.actionChange_Directory.triggered.connect(self.change_directory)
|
| 101 |
-
# self.actionNCBI.triggered.connect(ncbi_page)
|
| 102 |
-
# self.actionCasper2.triggered.connect(self.open_casper2_web_page)
|
| 103 |
-
# self.actionNCBI_BLAST.triggered.connect(ncbi_blast_page)
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
# self.progressBar.setMinimum(0)
|
| 108 |
-
# self.progressBar.setMaximum(100)
|
| 109 |
-
# self.progressBar.reset()
|
| 110 |
-
# self.Annotation_Window = AnnotationWindow(info_path)
|
| 111 |
-
# self.geneEntryField.setPlaceholderText("Example Inputs: \n\n"
|
| 112 |
-
# "Option 1: Feature (ID, Locus Tag, or Name)\n"
|
| 113 |
-
# "Example: 854068/YOL086C/ADH1 for S. cerevisiae alcohol dehydrogenase 1\n\n"
|
| 114 |
-
# "Option 2: Position (chromosome,start,stop)\n"
|
| 115 |
-
# "Example: 1,1,1000 for targeting chromosome 1, base pairs 1 to 1000\n\n"
|
| 116 |
-
# "Option 3: Sequence (must be within the selected organism)\n"
|
| 117 |
-
# "Example: Any nucleotide sequence between 100 and 10,000 base pairs.\n\n"
|
| 118 |
-
# "*Note: to multiplex, separate multiple queries by new lines*\n"
|
| 119 |
-
# "Example:\n"
|
| 120 |
-
# "1,1,1000\n"
|
| 121 |
-
# "5,1,500\n"
|
| 122 |
-
# "etc.")
|
| 123 |
-
|
| 124 |
-
# show functionalities on window
|
| 125 |
-
self.populate_fna_files = None
|
| 126 |
-
self._new_genome = None
|
| 127 |
-
# self.newEndonuclease = NewEndonuclease()
|
| 128 |
-
# self.CoTargeting = CoTargeting(info_path)
|
| 129 |
-
# self.Results = Results()
|
| 130 |
-
# self.export_tool_window = export_tool()
|
| 131 |
-
# self.genLib = genLibrary()
|
| 132 |
-
# self.myClosingWindow = closingWindow()
|
| 133 |
-
# self.genomebrowser = genomeBrowser.genomebrowser()
|
| 134 |
-
# self.launch_ncbi_button.clicked.connect(self.launch_ncbi)
|
| 135 |
-
|
| 136 |
-
# self.first_show = True
|
| 137 |
-
scale_ui(self, custom_scale_width=1150, custom_scale_height=650)
|
| 138 |
-
# self.show()
|
| 139 |
-
# self.load_dropdown_data()
|
| 140 |
-
print("MainWindow initialized")
|
| 141 |
-
except Exception as e:
|
| 142 |
-
show_error("Error in __init__() in main", e)
|
| 143 |
-
|
| 144 |
-
# def get_populate_fna_files(self):
|
| 145 |
-
# if self.populate_fna_files is None:
|
| 146 |
-
# self.populate_fna_files = PopulateFNAFiles(GlobalSettings.GlobalSettings1(GlobalSettings.appdir))
|
| 147 |
-
# return self.populate_fna_files
|
| 148 |
-
|
| 149 |
-
# def launch_populate_fna_files(self):
|
| 150 |
-
# self.get_populate_fna_files().show() # Ensure the window is shown
|
| 151 |
-
|
| 152 |
-
# this function prepares everything for the generate library function
|
| 153 |
-
# it is very similar to the gather settings, how ever it stores the data instead of calling the Annotation Window class
|
| 154 |
-
# it moves the data onto the generateLib function, and then opens that window
|
| 155 |
-
# def prep_genlib(self):
|
| 156 |
-
# # make sure the user actually inputs something
|
| 157 |
-
# try:
|
| 158 |
-
# inputstring = str(self.geneEntryField.toPlainText())
|
| 159 |
-
# if (inputstring.startswith("Example Inputs:") or inputstring == ""):
|
| 160 |
-
# show_message(
|
| 161 |
-
# fontSize=12,
|
| 162 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 163 |
-
# title="Error",
|
| 164 |
-
# message="No gene has been entered. Please enter a gene.",
|
| 165 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 166 |
-
# )
|
| 167 |
-
# return
|
| 168 |
-
# else:
|
| 169 |
-
# # standardize the input
|
| 170 |
-
# inputstring = inputstring.lower()
|
| 171 |
-
# found_matches_bool = True
|
| 172 |
-
# # call the respective function
|
| 173 |
-
# self.progressBar.setValue(10)
|
| 174 |
-
# if self.radioButton_Gene.isChecked():
|
| 175 |
-
# if len(self.genlib_list) > 0:
|
| 176 |
-
# found_matches_bool = True
|
| 177 |
-
# else:
|
| 178 |
-
# found_matches_bool = False
|
| 179 |
-
# elif self.radioButton_Position.isChecked() or self.radioButton_Sequence.isChecked():
|
| 180 |
-
# show_message(
|
| 181 |
-
# fontSize=12,
|
| 182 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 183 |
-
# title="Error",
|
| 184 |
-
# message="Generate Library can only work with feature searches.",
|
| 185 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 186 |
-
# )
|
| 187 |
-
# return
|
| 188 |
-
# """
|
| 189 |
-
# elif self.radioButton_Position.isChecked():
|
| 190 |
-
# pinput = inputstring.split(';')
|
| 191 |
-
# found_matches_bool = self.run_results("position", pinput,openAnnoWindow=False)
|
| 192 |
-
# elif self.radioButton_Sequence.isChecked():
|
| 193 |
-
# sinput = inputstring
|
| 194 |
-
# found_matches_bool = self.run_results("sequence", sinput, openAnnoWindow=False)
|
| 195 |
-
# """
|
| 196 |
-
# # if matches are found
|
| 197 |
-
# if found_matches_bool == True:
|
| 198 |
-
# # get the cspr file name
|
| 199 |
-
# cspr_file = self.organisms_to_files[self.orgChoice.currentText()][self.endoChoice.currentText()][0]
|
| 200 |
-
# if platform.system() == 'Windows':
|
| 201 |
-
# cspr_file = GlobalSettings.CSPR_DB + '\\' + cspr_file
|
| 202 |
-
# else:
|
| 203 |
-
# cspr_file = GlobalSettings.CSPR_DB + '/' + cspr_file
|
| 204 |
-
# kegg_non = 'non_kegg'
|
| 205 |
-
|
| 206 |
-
# # launch generateLib
|
| 207 |
-
# self.progressBar.setValue(100)
|
| 208 |
-
|
| 209 |
-
# # calculate the total number of matches found
|
| 210 |
-
# tempSum = len(self.genlib_list)
|
| 211 |
-
|
| 212 |
-
# # warn the user if the number is greater than 50
|
| 213 |
-
# if tempSum > 50:
|
| 214 |
-
# msgBox = QtWidgets.QMessageBox()
|
| 215 |
-
# msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
|
| 216 |
-
# msgBox.setIcon(QtWidgets.QMessageBox.Icon.Question)
|
| 217 |
-
# msgBox.setWindowTitle("Many Matches Found")
|
| 218 |
-
# msgBox.setText("More than 50 matches have been found. Continuing could cause a slow down...\n\n Do you wish to continue?")
|
| 219 |
-
# msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Yes)
|
| 220 |
-
# msgBox.addButton(QtWidgets.QMessageBox.StandardButton.No)
|
| 221 |
-
# msgBox.exec()
|
| 222 |
-
|
| 223 |
-
# if (msgBox.result() == QtWidgets.QMessageBox.No):
|
| 224 |
-
# self.searches.clear()
|
| 225 |
-
# self.progressBar.setValue(0)
|
| 226 |
-
# return -2
|
| 227 |
-
|
| 228 |
-
# self.genLib.launch(self.genlib_list,cspr_file, kegg_non)
|
| 229 |
-
# else:
|
| 230 |
-
# self.progressBar.setValue(0)
|
| 231 |
-
# except Exception as e:
|
| 232 |
-
# show_error("Error in prep_genlib() in main", e)
|
| 233 |
-
|
| 234 |
-
# # Function for collecting the settings from the input field and transferring them to run_results
|
| 235 |
-
# def gather_settings(self):
|
| 236 |
-
# try:
|
| 237 |
-
# ### If user searches multiple times for the same thing, this avoids re-searching the entire annotation file
|
| 238 |
-
# check_org = self.orgChoice.currentText().lower()
|
| 239 |
-
# check_endo = self.endoChoice.currentText().lower()
|
| 240 |
-
# check_anno_name = self.annotation_files.currentText().lower()
|
| 241 |
-
# check_input = str(self.geneEntryField.toPlainText()).lower()
|
| 242 |
-
# if (check_input == self.inputstring and check_org == self.org and check_anno_name == self.anno_name and check_endo == self.endo_name):
|
| 243 |
-
# same_search = True
|
| 244 |
-
# else:
|
| 245 |
-
# self.org = check_org
|
| 246 |
-
# self.anno_name = check_anno_name
|
| 247 |
-
# self.inputstring = check_input
|
| 248 |
-
# self.endo_name = check_endo
|
| 249 |
-
# same_search = False
|
| 250 |
-
|
| 251 |
-
# # Error check: make sure the user actually inputs something
|
| 252 |
-
# if (self.inputstring.startswith("Example Inputs:") or self.inputstring == ""):
|
| 253 |
-
# show_message(
|
| 254 |
-
# fontSize=12,
|
| 255 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 256 |
-
# title="Error",
|
| 257 |
-
# message="No feature has been searched for. Please enter a search.",
|
| 258 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 259 |
-
# )
|
| 260 |
-
# return
|
| 261 |
-
# else:
|
| 262 |
-
|
| 263 |
-
# ### Remove additional scoring columns if necessary
|
| 264 |
-
# header = get_table_headers(self.Results.targetTable) # Returns headers of the target table in View Targets window
|
| 265 |
-
# col_indices = [header.index(x) for x in GlobalSettings.algorithms if x in header] # Returns the index(es) of the alternative scoring column(s) in the target table of View Targets window
|
| 266 |
-
# if len(col_indices) > 0: # If alternative scoring has been done
|
| 267 |
-
# for i in col_indices:
|
| 268 |
-
# self.Results.targetTable.removeColumn(i)
|
| 269 |
-
# self.Results.targetTable.resizeColumnsToContents()
|
| 270 |
-
|
| 271 |
-
# self.progressBar.setValue(10)
|
| 272 |
-
# if self.radioButton_Gene.isChecked():
|
| 273 |
-
# ginput = [x.strip() for x in self.inputstring.split('\n')] # Split search based on newline character and remove deadspace
|
| 274 |
-
# self.run_results("feature", ginput, same_search)
|
| 275 |
-
# elif self.radioButton_Position.isChecked():
|
| 276 |
-
# pinput = [x.strip() for x in self.inputstring.split('\n')] # Split search based on newline character and remove deadspace
|
| 277 |
-
# self.run_results("position", pinput, same_search)
|
| 278 |
-
# elif self.radioButton_Sequence.isChecked():
|
| 279 |
-
# sinput = self.inputstring
|
| 280 |
-
# self.run_results("sequence", sinput, same_search)
|
| 281 |
-
# except Exception as e:
|
| 282 |
-
# show_error("Error in gather_settings() in main", e)
|
| 283 |
-
|
| 284 |
-
# # ---- Following functions are for running the auxillary algorithms and windows ---- #
|
| 285 |
-
# # this function is parses the annotation file given, and then goes through and goes onto results
|
| 286 |
-
# # it will call other versions of collect_table_data and fill_table that work with these file types
|
| 287 |
-
# # this function should work with the any type of annotation file, besides kegg.
|
| 288 |
-
# # this assumes that the parsers all store the data the same way, which gff and feature table do
|
| 289 |
-
# # please make sure the genbank parser stores the data in the same way
|
| 290 |
-
# # so far the gff files seems to all be different. Need to think about how we want to parse it
|
| 291 |
-
# def run_results_own_ncbi_file(self, inputstring, fileName, same_search, openAnnoWindow=True):
|
| 292 |
-
# try:
|
| 293 |
-
# self.set_progress(35)
|
| 294 |
-
# self.results_list = self.annotation_parser.genbank_search(inputstring, same_search)
|
| 295 |
-
|
| 296 |
-
# cspr_file = self.organisms_to_files[self.orgChoice.currentText()][self.endoChoice.currentText()][0]
|
| 297 |
-
# cspr_file = os.path.join(GlobalSettings.CSPR_DB, cspr_file)
|
| 298 |
-
|
| 299 |
-
# own_cspr_parser = CSPRparser(cspr_file)
|
| 300 |
-
# own_cspr_parser.read_first_lines()
|
| 301 |
-
# if len(own_cspr_parser.karystatsList) != self.annotation_parser.max_chrom:
|
| 302 |
-
# show_message(
|
| 303 |
-
# fontSize=12,
|
| 304 |
-
# icon=QtWidgets.QMessageBox.Icon.Warning,
|
| 305 |
-
# title="Warning:",
|
| 306 |
-
# message="The number of chromosomes do not match. This could cause errors.",
|
| 307 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 308 |
-
# )
|
| 309 |
-
# self.set_progress(60)
|
| 310 |
-
|
| 311 |
-
# self.searches.clear()
|
| 312 |
-
|
| 313 |
-
# self.set_progress(75)
|
| 314 |
-
# if not self.results_list:
|
| 315 |
-
# show_message(
|
| 316 |
-
# fontSize=12,
|
| 317 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 318 |
-
# title="No Matches Found",
|
| 319 |
-
# message="No matches found with that search, please try again.",
|
| 320 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 321 |
-
# )
|
| 322 |
-
# self.set_progress(0)
|
| 323 |
-
# return False if not openAnnoWindow else None
|
| 324 |
-
|
| 325 |
-
# self.set_progress(80)
|
| 326 |
-
|
| 327 |
-
# return self.Annotation_Window.fill_table_nonKegg(self, self.results_list) if openAnnoWindow else True
|
| 328 |
-
# except Exception as e:
|
| 329 |
-
# show_error(f"Error in run_results_own_ncbi_file() in main.", e)
|
| 330 |
-
|
| 331 |
-
# def set_progress(self, value):
|
| 332 |
-
# self.progressBar.setValue(value)
|
| 333 |
-
|
| 334 |
-
# def run_results(self, inputtype, inputstring, same_search, openAnnoWindow=True):
|
| 335 |
-
# try:
|
| 336 |
-
# file_name = self.annotation_files.currentText()
|
| 337 |
-
# for file in glob.glob(GlobalSettings.CSPR_DB + "/**/*.gb*", recursive=True):
|
| 338 |
-
# if file_name in file:
|
| 339 |
-
# self.annotation_parser.annotationFileName = file
|
| 340 |
-
# break
|
| 341 |
-
# self.Results.annotation_path = self.annotation_parser.annotationFileName
|
| 342 |
-
|
| 343 |
-
# progvalue = 15
|
| 344 |
-
# self.searches = {}
|
| 345 |
-
# self.gene_list = {}
|
| 346 |
-
# self.progressBar.setValue(progvalue)
|
| 347 |
-
|
| 348 |
-
# try:
|
| 349 |
-
# self.Results.endonucleaseBox.currentIndexChanged.disconnect()
|
| 350 |
-
# except Exception as e:
|
| 351 |
-
# pass
|
| 352 |
-
# # set Results endo combo box
|
| 353 |
-
# self.Results.endonucleaseBox.clear()
|
| 354 |
-
|
| 355 |
-
# # set the results window endoChoice box menu
|
| 356 |
-
# # set the mainWindow's endoChoice first, and then loop through and set the rest of them
|
| 357 |
-
# self.Results.endonucleaseBox.addItem(self.endoChoice.currentText())
|
| 358 |
-
# for item in self.organisms_to_endos[str(self.orgChoice.currentText())]:
|
| 359 |
-
# if item != self.Results.endonucleaseBox.currentText():
|
| 360 |
-
# self.Results.endonucleaseBox.addItem(item)
|
| 361 |
-
|
| 362 |
-
# self.Results.endonucleaseBox.currentIndexChanged.connect(self.Results.changeEndonuclease)
|
| 363 |
-
# self.Results.get_endo_data()
|
| 364 |
-
|
| 365 |
-
# # self.Results.change_start_end_button.setEnabled(False)
|
| 366 |
-
# self.Results.displayGeneViewer.setChecked(0)
|
| 367 |
-
|
| 368 |
-
# if inputtype == "feature":
|
| 369 |
-
# fileType = self.annotation_parser.find_which_file_version()
|
| 370 |
-
|
| 371 |
-
# # if the parser retuns the 'wrong file type' error
|
| 372 |
-
# if fileType == -1:
|
| 373 |
-
# show_message(
|
| 374 |
-
# fontSize=12,
|
| 375 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 376 |
-
# title="Error",
|
| 377 |
-
# message="Feature search requires a GenBank formatted annotation file. Please select a file from the dropdown menu or search by position",
|
| 378 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 379 |
-
# )
|
| 380 |
-
# self.progressBar.setValue(0)
|
| 381 |
-
# return
|
| 382 |
-
|
| 383 |
-
# # make sure an annotation file has been selected
|
| 384 |
-
# if self.annotation_files.currentText() == "None":
|
| 385 |
-
# show_message(
|
| 386 |
-
# fontSize=12,
|
| 387 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 388 |
-
# title="Error",
|
| 389 |
-
# message="Search by feature requires a GenBank annotation file. Please select one from the dropdown menu or search by position.",
|
| 390 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 391 |
-
# )
|
| 392 |
-
# self.progressBar.setValue(0)
|
| 393 |
-
# return
|
| 394 |
-
|
| 395 |
-
# # this now just goes onto the other version of run_results
|
| 396 |
-
# myBool = self.run_results_own_ncbi_file(inputstring, self.annotation_files.currentText(), same_search, openAnnoWindow=openAnnoWindow)
|
| 397 |
-
# if not openAnnoWindow:
|
| 398 |
-
# return myBool
|
| 399 |
-
# else:
|
| 400 |
-
# self.progressBar.setValue(0)
|
| 401 |
-
# return
|
| 402 |
-
|
| 403 |
-
# if inputtype == "position":
|
| 404 |
-
# full_org = str(self.orgChoice.currentText())
|
| 405 |
-
# self.checked_info.clear()
|
| 406 |
-
# self.check_ntseq_info.clear()
|
| 407 |
-
|
| 408 |
-
# for item in inputstring: # Loop through each search
|
| 409 |
-
# searchIndices = [x.strip() for x in item.split(',')] # Parse input query
|
| 410 |
-
|
| 411 |
-
# if len(searchIndices) != 3:
|
| 412 |
-
# show_message(
|
| 413 |
-
# fontSize=12,
|
| 414 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 415 |
-
# title="Error",
|
| 416 |
-
# message="There are 3 arguments required for this function: chromosome, start position, and end position.",
|
| 417 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 418 |
-
# )
|
| 419 |
-
# self.progressBar.setValue(0)
|
| 420 |
-
# return
|
| 421 |
-
|
| 422 |
-
# if not searchIndices[0].isdigit() or not searchIndices[1].isdigit() or not searchIndices[2].isdigit():
|
| 423 |
-
# show_message(
|
| 424 |
-
# fontSize=12,
|
| 425 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 426 |
-
# title="Error",
|
| 427 |
-
# message="The positions given must be integers. Please try again.",
|
| 428 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 429 |
-
# )
|
| 430 |
-
# self.progressBar.setValue(0)
|
| 431 |
-
# return
|
| 432 |
-
# elif int(searchIndices[1]) >= int(searchIndices[2]):
|
| 433 |
-
# show_message(
|
| 434 |
-
# fontSize=12,
|
| 435 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 436 |
-
# title="Error",
|
| 437 |
-
# message="The start index must be less than the end index.",
|
| 438 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 439 |
-
# )
|
| 440 |
-
# self.progressBar.setValue(0)
|
| 441 |
-
# return
|
| 442 |
-
# elif abs(int(searchIndices[2])-int(searchIndices[1])) > 50000:
|
| 443 |
-
# show_message(
|
| 444 |
-
# fontSize=12,
|
| 445 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 446 |
-
# title="Error",
|
| 447 |
-
# message="The search range must be less than 50,000 nt.",
|
| 448 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 449 |
-
# )
|
| 450 |
-
# self.progressBar.setValue(0)
|
| 451 |
-
# return
|
| 452 |
-
# elif int(searchIndices[0]) > self.annotation_parser.get_max_chrom():
|
| 453 |
-
# show_message(
|
| 454 |
-
# fontSize=12,
|
| 455 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 456 |
-
# title="Error",
|
| 457 |
-
# message="Chromosome %s does not exist in the selected annotation file." % searchIndices[0],
|
| 458 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 459 |
-
# )
|
| 460 |
-
# self.progressBar.setValue(0)
|
| 461 |
-
# return
|
| 462 |
-
# # append the data into the checked_info
|
| 463 |
-
# tempString = 'chrom: ' + str(searchIndices[0]) + ',start: ' + str(searchIndices[1]) + ',end: ' + str(searchIndices[2])
|
| 464 |
-
# self.checked_info[tempString] = (int(searchIndices[0]), int(searchIndices[1])-1, int(searchIndices[2]))
|
| 465 |
-
|
| 466 |
-
# self.progressBar.setValue(50)
|
| 467 |
-
# self.Results.transfer_data(full_org, self.organisms_to_files[full_org], [str(self.endoChoice.currentText())], os.getcwd(), self.checked_info, self.check_ntseq_info,inputtype)
|
| 468 |
-
# self.Results.load_gene_viewer()
|
| 469 |
-
# self.progressBar.setValue(100)
|
| 470 |
-
# self.pushButton_ViewTargets.setEnabled(True)
|
| 471 |
-
# self.GenerateLibrary.setEnabled(True)
|
| 472 |
-
|
| 473 |
-
# if inputtype == "sequence":
|
| 474 |
-
# fileType = self.annotation_parser.find_which_file_version()
|
| 475 |
-
|
| 476 |
-
# if fileType == -1:
|
| 477 |
-
# show_message(
|
| 478 |
-
# fontSize=12,
|
| 479 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 480 |
-
# title="Error",
|
| 481 |
-
# message="Search by sequence requires a GenBank annotation file. Please select one from the dropdown menu or search by position.",
|
| 482 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 483 |
-
# )
|
| 484 |
-
# self.progressBar.setValue(0)
|
| 485 |
-
# return
|
| 486 |
-
# if self.annotation_files.currentText() == "None":
|
| 487 |
-
# show_message(
|
| 488 |
-
# fontSize=12,
|
| 489 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 490 |
-
# title="Error",
|
| 491 |
-
# message="Search by sequence requires a GenBank annotation file. Please select one from the dropdown menu or search by position.",
|
| 492 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 493 |
-
# )
|
| 494 |
-
# self.progressBar.setValue(0)
|
| 495 |
-
# return
|
| 496 |
-
|
| 497 |
-
# checkString = 'AGTCN'
|
| 498 |
-
# full_org = str(self.orgChoice.currentText())
|
| 499 |
-
# self.checked_info.clear()
|
| 500 |
-
# self.progressBar.setValue(10)
|
| 501 |
-
# inputstring = inputstring.replace('\n','').upper().strip()
|
| 502 |
-
|
| 503 |
-
# for letter in inputstring:
|
| 504 |
-
# if letter not in checkString:
|
| 505 |
-
# show_message(
|
| 506 |
-
# fontSize=12,
|
| 507 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 508 |
-
# title="Error",
|
| 509 |
-
# message="The sequence must consist of A, G, T, C, or N. No other characters are allowed.",
|
| 510 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 511 |
-
# )
|
| 512 |
-
# self.progressBar.setValue(0)
|
| 513 |
-
# return
|
| 514 |
-
|
| 515 |
-
# if len(inputstring) < 100:
|
| 516 |
-
# show_message(
|
| 517 |
-
# fontSize=12,
|
| 518 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 519 |
-
# title="Error",
|
| 520 |
-
# message="The sequence given is too small. At least 100 characters are required.",
|
| 521 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 522 |
-
# )
|
| 523 |
-
# self.progressBar.setValue(0)
|
| 524 |
-
# return
|
| 525 |
-
|
| 526 |
-
# if len(inputstring) > 10000:
|
| 527 |
-
# show_message(
|
| 528 |
-
# fontSize=12,
|
| 529 |
-
# icon=QtWidgets.QMessageBox.Icon.Question,
|
| 530 |
-
# title="Large Sequence Detected",
|
| 531 |
-
# message="The sequence given is too large one.\n\nPlease input a sequence less than 10kb in length.",
|
| 532 |
-
# button=QtWidgets.QMessageBox.StandardButton.Yes
|
| 533 |
-
# )
|
| 534 |
-
# self.progressBar.setValue(0)
|
| 535 |
-
# return
|
| 536 |
-
|
| 537 |
-
# self.progressBar.setValue(30)
|
| 538 |
-
|
| 539 |
-
# # Check the GBFF file for the sequence
|
| 540 |
-
# my_check = self.annotation_parser.get_sequence_info(inputstring)
|
| 541 |
-
|
| 542 |
-
# self.progressBar.setValue(55) # Update progress bar
|
| 543 |
-
|
| 544 |
-
# if type(my_check) == bool:
|
| 545 |
-
# show_message(
|
| 546 |
-
# fontSize=12,
|
| 547 |
-
# icon=QtWidgets.QMessageBox.Icon.Question,
|
| 548 |
-
# title="Sequence Not Found",
|
| 549 |
-
# message="The sequence entered was not found.\n\nPlease input a sequence that is in the selected organism.",
|
| 550 |
-
# button=QtWidgets.QMessageBox.StandardButton.Yes
|
| 551 |
-
# )
|
| 552 |
-
# self.progressBar.setValue(0)
|
| 553 |
-
# return
|
| 554 |
-
|
| 555 |
-
# else:
|
| 556 |
-
# tempString = 'chrom: ' + str(my_check[0]) + ',start: ' + str(my_check[1]) + ',end: ' + str(my_check[2])
|
| 557 |
-
# self.checked_info[tempString] = (int(my_check[0]), int(my_check[1])-1, int(my_check[2]))
|
| 558 |
-
|
| 559 |
-
# self.progressBar.setValue(75)
|
| 560 |
-
|
| 561 |
-
# self.Results.transfer_data(full_org, self.organisms_to_files[full_org], [str(self.endoChoice.currentText())], os.getcwd(), self.checked_info, self.check_ntseq_info, inputtype)
|
| 562 |
-
# self.Results.load_gene_viewer()
|
| 563 |
-
# self.progressBar.setValue(100)
|
| 564 |
-
# self.pushButton_ViewTargets.setEnabled(True)
|
| 565 |
-
# self.GenerateLibrary.setEnabled(True)
|
| 566 |
-
# except Exception as e:
|
| 567 |
-
# show_error("Error in run_results() in main", e)
|
| 568 |
-
|
| 569 |
-
# def handle_feature_search(self, input_string, open_anno_window):
|
| 570 |
-
# file_type = self.annotation_parser.find_which_file_version()
|
| 571 |
-
# if file_type == -1 or self.annotation_files.currentText() == "None":
|
| 572 |
-
# self.show_error_message("Feature search requires a GenBank formatted annotation file.")
|
| 573 |
-
# return False
|
| 574 |
-
|
| 575 |
-
# return self.run_results_own_ncbi_file(input_string, self.annotation_files.currentText(), same_search, open_anno_window)
|
| 576 |
-
|
| 577 |
-
# def launch_newGenome(self):
|
| 578 |
-
# try:
|
| 579 |
-
# # Update endo list
|
| 580 |
-
# self.get_new_genome().fillEndo()
|
| 581 |
-
# if self.get_new_genome().first_show:
|
| 582 |
-
# center_ui(self.get_new_genome())
|
| 583 |
-
# self.get_new_genome().first_show = False
|
| 584 |
-
# self.hide()
|
| 585 |
-
# self.get_new_genome().show()
|
| 586 |
-
# except Exception as e:
|
| 587 |
-
# show_error("Error in launch_newGenome() in main", e)
|
| 588 |
-
|
| 589 |
-
# def launch_newEndonuclease(self):
|
| 590 |
-
# try:
|
| 591 |
-
# center_ui(self.newEndonuclease)
|
| 592 |
-
# self.newEndonuclease.show()
|
| 593 |
-
# self.newEndonuclease.activateWindow()
|
| 594 |
-
# except Exception as e:
|
| 595 |
-
# show_error("Error in launch_newEndonuclease() in main", e)
|
| 596 |
-
|
| 597 |
-
# #launch genome browser tool
|
| 598 |
-
# def launch_newGenomeBrowser(self):
|
| 599 |
-
# try:
|
| 600 |
-
# self.genomebrowser.createGraph(self)
|
| 601 |
-
# except Exception as e:
|
| 602 |
-
# show_error("Error in launch_newGenomeBrowser() in main", e)
|
| 603 |
-
|
| 604 |
-
# def launch_ncbi(self):
|
| 605 |
-
# try:
|
| 606 |
-
# show_message(
|
| 607 |
-
# fontSize=12,
|
| 608 |
-
# icon=QtWidgets.QMessageBox.Icon.Information,
|
| 609 |
-
# title="Note:",
|
| 610 |
-
# message="NCBI Annotation Guidelines:\n\nDownload annotation files of the exact species and strain used in Analyze New Genome.\n\nMismatched annotation files will inhibit downstream analyses.",
|
| 611 |
-
# button=QtWidgets.QMessageBox.StandardButton.Ok
|
| 612 |
-
# )
|
| 613 |
-
# if self.ncbi.first_show:
|
| 614 |
-
# self.ncbi.first_show = False
|
| 615 |
-
# center_ui(self.ncbi)
|
| 616 |
-
|
| 617 |
-
# self.ncbi.show()
|
| 618 |
-
# self.ncbi.activateWindow()
|
| 619 |
-
# except Exception as e:
|
| 620 |
-
# show_error("launch_ncbi() in main", e)
|
| 621 |
-
|
| 622 |
-
# # this function does the same stuff that the other collect_table_data does, but works with the other types of files
|
| 623 |
-
# def collect_table_data_nonkegg(self):
|
| 624 |
-
# try:
|
| 625 |
-
# # start out the same as the other collect_table_data
|
| 626 |
-
# self.checked_info.clear()
|
| 627 |
-
# self.genlib_list.clear()
|
| 628 |
-
# self.check_ntseq_info.clear()
|
| 629 |
-
# full_org = str(self.orgChoice.currentText())
|
| 630 |
-
# holder = ()
|
| 631 |
-
# selected_indices = []
|
| 632 |
-
# selected_rows = self.Annotation_Window.tableWidget.selectionModel().selectedRows()
|
| 633 |
-
# for ind in sorted(selected_rows):
|
| 634 |
-
# selected_indices.append(ind.row())
|
| 635 |
-
|
| 636 |
-
# for item in self.checkBoxes:
|
| 637 |
-
# feature = item[1]
|
| 638 |
-
# # If inidices of checkBoxes list and selected rows in table match...
|
| 639 |
-
# if item[2] in selected_indices:
|
| 640 |
-
# holder = (item[0],int(feature.location.start),int(feature.location.end)) # Tuple order: Feature chromosome/scaffold number, feature start, feature end
|
| 641 |
-
# ### If locus tag available, combine with gene name to create dict key
|
| 642 |
-
# if 'locus_tag' in feature.qualifiers:
|
| 643 |
-
# tag = feature.qualifiers['locus_tag'][0]
|
| 644 |
-
# key = tag + ": " + get_name(feature)
|
| 645 |
-
# else:
|
| 646 |
-
# key = get_name(feature)
|
| 647 |
-
# self.checked_info[key] = holder
|
| 648 |
-
# self.genlib_list.append((item[0],feature)) # Tuple order: Feature chromosome/scaffold number, SeqFeature object
|
| 649 |
-
# else:
|
| 650 |
-
# # If item was not selected in the table, go to the next item
|
| 651 |
-
# continue
|
| 652 |
-
|
| 653 |
-
# # now call transfer data
|
| 654 |
-
# self.progressBar.setValue(95)
|
| 655 |
-
# self.Results.transfer_data(full_org, self.organisms_to_files[full_org], [str(self.endoChoice.currentText())], os.getcwd(),
|
| 656 |
-
# self.checked_info, self.check_ntseq_info,inputtype="feature")
|
| 657 |
-
# self.Results.load_gene_viewer()
|
| 658 |
-
|
| 659 |
-
# self.progressBar.setValue(100)
|
| 660 |
-
# self.pushButton_ViewTargets.setEnabled(True)
|
| 661 |
-
# self.GenerateLibrary.setEnabled(True)
|
| 662 |
-
# except Exception as e:
|
| 663 |
-
# show_error("Error in collect_table_data_nonkegg() in main", e)
|
| 664 |
-
|
| 665 |
-
# def separate_line(self, input_string):
|
| 666 |
-
# try:
|
| 667 |
-
# export_array = []
|
| 668 |
-
# while True:
|
| 669 |
-
# index = input_string.find('\n')
|
| 670 |
-
# if index == -1:
|
| 671 |
-
# if len(input_string) == 0:
|
| 672 |
-
# return export_array
|
| 673 |
-
# else:
|
| 674 |
-
# export_array.append(input_string)
|
| 675 |
-
# return export_array
|
| 676 |
-
# export_array.append(input_string[:index])
|
| 677 |
-
# input_string = input_string[index + 1:]
|
| 678 |
-
# except Exception as e:
|
| 679 |
-
# show_error("Error in seperate_line() in main", e)
|
| 680 |
-
|
| 681 |
-
# def removeWhiteSpace(self, strng):
|
| 682 |
-
# try:
|
| 683 |
-
# while True:
|
| 684 |
-
# if len(strng) == 0 or (strng[0] != " " and strng[0] != "\n"):
|
| 685 |
-
# break
|
| 686 |
-
# strng = strng[1:]
|
| 687 |
-
# while True:
|
| 688 |
-
# if len(strng) == 0 or (strng[len(strng) - 1] != " " and strng[0] != "\n"):
|
| 689 |
-
# return strng
|
| 690 |
-
# strng = strng[:len(strng) - 1]
|
| 691 |
-
# except Exception as e:
|
| 692 |
-
# show_error("Error in removeWhiteSpace() in main", e)
|
| 693 |
-
|
| 694 |
-
# # Function to enable and disable the Annotation function if searching by position or sequence
|
| 695 |
-
# def toggle_annotation(self):
|
| 696 |
-
# try:
|
| 697 |
-
# if self.radioButton_Gene.isChecked():
|
| 698 |
-
# self.Step2.setEnabled(True)
|
| 699 |
-
# else:
|
| 700 |
-
# self.Step2.setEnabled(True)
|
| 701 |
-
# except Exception as e:
|
| 702 |
-
# show_error("Error in toggle_annotation() in main", e)
|
| 703 |
-
|
| 704 |
-
# def fill_annotation_dropdown(self):
|
| 705 |
-
# try:
|
| 706 |
-
# #recursive search for all GenBank files in casper db folder
|
| 707 |
-
# self.annotation_files.clear()
|
| 708 |
-
# annotation_files = glob.glob(GlobalSettings.CSPR_DB + "/**/*.gb*", recursive=True)
|
| 709 |
-
# if platform.system() == "Windows":
|
| 710 |
-
# for i in range(len(annotation_files)):
|
| 711 |
-
# annotation_files[i] = annotation_files[i].replace("/","\\")
|
| 712 |
-
# annotation_files[i] = annotation_files[i][annotation_files[i].rfind("\\") + 1:]
|
| 713 |
-
# else:
|
| 714 |
-
# for i in range(len(annotation_files)):
|
| 715 |
-
# annotation_files[i] = annotation_files[i].replace("\\","/")
|
| 716 |
-
# annotation_files[i] = annotation_files[i][annotation_files[i].rfind("/") + 1:]
|
| 717 |
-
|
| 718 |
-
# annotation_files.sort(key=str.lower)
|
| 719 |
-
# self.annotation_files.addItems(annotation_files)
|
| 720 |
-
# self.annotation_files.addItems(["None"])
|
| 721 |
-
# except Exception as e:
|
| 722 |
-
# show_error("Error in fill_annotation_dropdown() in main", e)
|
| 723 |
-
|
| 724 |
-
# def make_dictonary(self):
|
| 725 |
-
# try:
|
| 726 |
-
# url = "https://www.genome.jp/dbget-bin/get_linkdb?-t+genes+gn:" + self.TNumbers[
|
| 727 |
-
# self.Annotations_Organism.currentText()]
|
| 728 |
-
# source_code = requests.get(url, verify=False)
|
| 729 |
-
# plain_text = source_code.text
|
| 730 |
-
# buf = io.StringIO(plain_text)
|
| 731 |
-
|
| 732 |
-
# while True:
|
| 733 |
-
# line = buf.readline()
|
| 734 |
-
# if line[0] == "-":
|
| 735 |
-
# break
|
| 736 |
-
# while True:
|
| 737 |
-
# line = buf.readline()
|
| 738 |
-
# if line[1] != "a":
|
| 739 |
-
# return
|
| 740 |
-
# line = line[line.find(">") + 1:]
|
| 741 |
-
# seq = line[line.find(":") + 1:line.find("<")]
|
| 742 |
-
# line = line[line.find(">") + 1:]
|
| 743 |
-
|
| 744 |
-
# i = 0
|
| 745 |
-
# while True:
|
| 746 |
-
# if line[i] == " ":
|
| 747 |
-
# i = i + 1
|
| 748 |
-
# else:
|
| 749 |
-
# break
|
| 750 |
-
# key = line[i:line.find("\n") - 1]
|
| 751 |
-
# if key in self.gene_list:
|
| 752 |
-
# if seq not in self.gene_list[key]:
|
| 753 |
-
# self.gene_list[key].append(seq)
|
| 754 |
-
# else:
|
| 755 |
-
# self.gene_list[key] = [seq]
|
| 756 |
-
# z = 5
|
| 757 |
-
# except Exception as e:
|
| 758 |
-
# show_error("Error in make_dictionary() in main", e)
|
| 759 |
-
|
| 760 |
-
# def organism_finder(self, long_str):
|
| 761 |
-
# try:
|
| 762 |
-
# semi = long_str.find(";")
|
| 763 |
-
# index = 1
|
| 764 |
-
# while True:
|
| 765 |
-
# if long_str[semi - index] == " ":
|
| 766 |
-
# break
|
| 767 |
-
# index = index + 1
|
| 768 |
-
# return long_str[:semi - index]
|
| 769 |
-
# except Exception as e:
|
| 770 |
-
# show_error("Error in organism_finder() in main", e)
|
| 771 |
-
|
| 772 |
-
# # This method is for testing the execution of a button call to make sure the button is linked properly
|
| 773 |
-
# def testexe(self):
|
| 774 |
-
# try:
|
| 775 |
-
# msgBox = QtWidgets.QMessageBox()
|
| 776 |
-
# msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
|
| 777 |
-
# msgBox.setIcon(QtWidgets.QMessageBox.Icon.Question)
|
| 778 |
-
# msgBox.setWindowTitle("Extract!")
|
| 779 |
-
# msgBox.setText(
|
| 780 |
-
# "Are you sure you want to quit?")
|
| 781 |
-
# msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Yes)
|
| 782 |
-
# msgBox.addButton(QtWidgets.QMessageBox.StandardButton.No)
|
| 783 |
-
# msgBox.exec()
|
| 784 |
-
|
| 785 |
-
# if msgBox.result() == QtWidgets.QMessageBox.Yes:
|
| 786 |
-
# # print(self.orgChoice.currentText())
|
| 787 |
-
# sys.exit()
|
| 788 |
-
# else:
|
| 789 |
-
# pass
|
| 790 |
-
# except Exception as e:
|
| 791 |
-
# show_error("Error in testexe() in main", e)
|
| 792 |
-
|
| 793 |
-
# def getData(self):
|
| 794 |
-
# try:
|
| 795 |
-
# try:
|
| 796 |
-
# self.orgChoice.currentIndexChanged.disconnect()
|
| 797 |
-
# except Exception as e:
|
| 798 |
-
# pass
|
| 799 |
-
|
| 800 |
-
# self.orgChoice.clear()
|
| 801 |
-
# self.endoChoice.clear()
|
| 802 |
-
# mypath = os.getcwd()
|
| 803 |
-
# found = False
|
| 804 |
-
# self.dbpath = mypath
|
| 805 |
-
# onlyfiles = [str(f) for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))]
|
| 806 |
-
# onlyfiles.sort(key=str.lower)
|
| 807 |
-
# self.organisms_to_files = {}
|
| 808 |
-
# self.organisms_to_endos = {}
|
| 809 |
-
# first = True
|
| 810 |
-
# for file in onlyfiles:
|
| 811 |
-
# if file.find('.cspr') != -1:
|
| 812 |
-
# if first == True:
|
| 813 |
-
# first = False
|
| 814 |
-
# found = True
|
| 815 |
-
# newname = file[0:-4]
|
| 816 |
-
# endo = newname[newname.rfind("_")+1:-1]
|
| 817 |
-
# hold = open(file, 'r')
|
| 818 |
-
# buf = (hold.readline())
|
| 819 |
-
# buf = str(buf)
|
| 820 |
-
# buf = buf.strip()
|
| 821 |
-
# species = buf.replace("GENOME: ",'')
|
| 822 |
-
|
| 823 |
-
# if species in self.organisms_to_files:
|
| 824 |
-
# self.organisms_to_files[species][endo] = [file, file.replace(".cspr", "_repeats.db")]
|
| 825 |
-
# else:
|
| 826 |
-
# self.organisms_to_files[species] = {}
|
| 827 |
-
# self.organisms_to_files[species][endo] = [file, file.replace(".cspr", "_repeats.db")]
|
| 828 |
-
|
| 829 |
-
# if species in self.organisms_to_endos:
|
| 830 |
-
# self.organisms_to_endos[species].append(endo)
|
| 831 |
-
# else:
|
| 832 |
-
# self.organisms_to_endos[species] = [endo]
|
| 833 |
-
# if self.orgChoice.findText(species) == -1:
|
| 834 |
-
# self.orgChoice.addItem(species)
|
| 835 |
-
|
| 836 |
-
# if found == False:
|
| 837 |
-
# return False
|
| 838 |
-
|
| 839 |
-
# self.endoChoice.clear()
|
| 840 |
-
# self.endoChoice.addItems(self.organisms_to_endos[str(self.orgChoice.currentText())])
|
| 841 |
-
# self.orgChoice.currentIndexChanged.connect(self.changeEndos)
|
| 842 |
-
# except Exception as e:
|
| 843 |
-
# show_error("Error in getData() in main.", e)
|
| 844 |
-
|
| 845 |
-
# def changeEndos(self):
|
| 846 |
-
# try:
|
| 847 |
-
# if self.orgChoice.currentText() != "Custom Input Sequences":
|
| 848 |
-
# self.Step2.setEnabled(True)
|
| 849 |
-
# self.endoChoice.setEnabled(True)
|
| 850 |
-
# self.radioButton_Gene.show()
|
| 851 |
-
# self.radioButton_Position.show()
|
| 852 |
-
# self.endoChoice.clear()
|
| 853 |
-
# self.endoChoice.addItems(self.organisms_to_endos[str(self.orgChoice.currentText())])
|
| 854 |
-
# else:
|
| 855 |
-
# self.Step2.setEnabled(False)
|
| 856 |
-
# self.endoChoice.clear()
|
| 857 |
-
# self.endoChoice.setEnabled(False)
|
| 858 |
-
# self.radioButton_Gene.hide()
|
| 859 |
-
# self.radioButton_Position.hide()
|
| 860 |
-
# except Exception as e:
|
| 861 |
-
# show_error("Error in changeEndos() in main", e)
|
| 862 |
-
|
| 863 |
-
# def change_directory(self):
|
| 864 |
-
# try:
|
| 865 |
-
# mydir = QtWidgets.QFileDialog.getExistingDirectory(
|
| 866 |
-
# None, "Open a folder...", self.dbpath, QtWidgets.QFileDialog.Option.ShowDirsOnly)
|
| 867 |
-
|
| 868 |
-
# if not os.path.isdir(mydir):
|
| 869 |
-
# show_message(
|
| 870 |
-
# fontSize=12,
|
| 871 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 872 |
-
# title="Not a directory",
|
| 873 |
-
# message="The directory you selected does not exist."
|
| 874 |
-
# )
|
| 875 |
-
# return
|
| 876 |
-
|
| 877 |
-
# if not any(file.endswith(".cspr") for file in os.listdir(mydir)):
|
| 878 |
-
# show_message(
|
| 879 |
-
# fontSize=12,
|
| 880 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 881 |
-
# title="Directory is invalid!",
|
| 882 |
-
# message="You must select a directory with CSPR Files!"
|
| 883 |
-
# )
|
| 884 |
-
# return
|
| 885 |
-
|
| 886 |
-
# os.chdir(mydir)
|
| 887 |
-
# mydir = mydir.replace("/", "\\") if platform.system() == "Windows" else mydir
|
| 888 |
-
# GlobalSettings.CSPR_DB = mydir
|
| 889 |
-
|
| 890 |
-
# GlobalSettings.MTWin.directory = mydir
|
| 891 |
-
# GlobalSettings.MTWin.get_data()
|
| 892 |
-
# GlobalSettings.pop_Analysis.get_data()
|
| 893 |
-
# self.getData()
|
| 894 |
-
# self.fill_annotation_dropdown()
|
| 895 |
-
# except Exception as e:
|
| 896 |
-
# show_error("Error in change_directory() in main.", e)
|
| 897 |
-
|
| 898 |
-
# def changeto_multitargeting(self):
|
| 899 |
-
# try:
|
| 900 |
-
# os.chdir(os.getcwd())
|
| 901 |
-
# if GlobalSettings.MTWin.first_show == True:
|
| 902 |
-
# GlobalSettings.MTWin.show()
|
| 903 |
-
# GlobalSettings.MTWin.first_show = False
|
| 904 |
-
# else:
|
| 905 |
-
# GlobalSettings.MTWin.show()
|
| 906 |
-
# GlobalSettings.mainWindow.hide()
|
| 907 |
-
|
| 908 |
-
# except Exception as e:
|
| 909 |
-
# show_error("Error in changeto_multitargeting() in main.", e)
|
| 910 |
-
|
| 911 |
-
# #change to population analysis window
|
| 912 |
-
# def changeto_population_Analysis(self):
|
| 913 |
-
# try:
|
| 914 |
-
# GlobalSettings.pop_Analysis.launch()
|
| 915 |
-
# if GlobalSettings.pop_Analysis.first_show == True:
|
| 916 |
-
# center_ui(GlobalSettings.pop_Analysis)
|
| 917 |
-
# GlobalSettings.pop_Analysis.first_show = False
|
| 918 |
-
# GlobalSettings.pop_Analysis.show()
|
| 919 |
-
# GlobalSettings.mainWindow.hide()
|
| 920 |
-
# except Exception as e:
|
| 921 |
-
# show_error("Error in changeto_population_Analysis() in main.", e)
|
| 922 |
-
|
| 923 |
-
# def annotation_information(self):
|
| 924 |
-
# try:
|
| 925 |
-
# show_message(
|
| 926 |
-
# fontSize=12,
|
| 927 |
-
# icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 928 |
-
# title="Annotation Information",
|
| 929 |
-
# message="Annotation files are used for searching for spacers on a gene/locus basis and can be selected here using either " \
|
| 930 |
-
# "NCBI databases or a local file."
|
| 931 |
-
# )
|
| 932 |
-
# except Exception as e:
|
| 933 |
-
# show_error("Error in annotation_information() in main.", e)
|
| 934 |
-
|
| 935 |
-
# @QtCore.pyqtSlot()
|
| 936 |
-
# def view_results(self):
|
| 937 |
-
# try:
|
| 938 |
-
# #center results window on current screen
|
| 939 |
-
# if self.Results.first_show == True:
|
| 940 |
-
# self.Results.first_show = False
|
| 941 |
-
# self.Results.centerUI()
|
| 942 |
-
|
| 943 |
-
# self.Results.show()
|
| 944 |
-
# self.hide()
|
| 945 |
-
# except Exception as e:
|
| 946 |
-
# show_error("Error in view_results() in main", e)
|
| 947 |
-
|
| 948 |
-
# def closeFunction(self):
|
| 949 |
-
# try:
|
| 950 |
-
# # Attempt to close the NCBI window if it exists
|
| 951 |
-
# try:
|
| 952 |
-
# self.ncbi.close()
|
| 953 |
-
# except AttributeError:
|
| 954 |
-
# print("No NCBI window to close.")
|
| 955 |
-
|
| 956 |
-
# self.myClosingWindow.get_files()
|
| 957 |
-
# center_ui(self.myClosingWindow)
|
| 958 |
-
# self.myClosingWindow.show()
|
| 959 |
-
# except Exception as e:
|
| 960 |
-
# show_error("Error in closeFunction() in main", e)
|
| 961 |
-
|
| 962 |
-
# def close_app(self):
|
| 963 |
-
# try:
|
| 964 |
-
# # Attempt to close the NCBI window if it exists
|
| 965 |
-
# try:
|
| 966 |
-
# self.ncbi.close()
|
| 967 |
-
# except Exception as e:
|
| 968 |
-
# print("No NCBI window to close.")
|
| 969 |
-
|
| 970 |
-
# self.closeFunction()
|
| 971 |
-
# self.close()
|
| 972 |
-
# except Exception as e:
|
| 973 |
-
# show_error("Error in close_app() in main", e)
|
| 974 |
-
|
| 975 |
-
# def load_dropdown_data(self):
|
| 976 |
-
# """Fill in organism/endo/annotation dropdown information."""
|
| 977 |
-
# try:
|
| 978 |
-
# self.getData()
|
| 979 |
-
# self.fill_annotation_dropdown()
|
| 980 |
-
# # self.logger.debug("Successfully loaded organism/endo/annotation drop down information in Main.")
|
| 981 |
-
# except Exception as e:
|
| 982 |
-
# show_error("Error in load_dropdown_data() in Main", e)
|
| 983 |
-
|
| 984 |
-
# # Call methods for other windows if needed
|
| 985 |
-
# # self.load_mt_data()
|
| 986 |
-
# # self.load_pop_analysis_data()
|
| 987 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PyQt6.QtWidgets import QTabWidget, QTabBar, QToolButton, QWidget
|
| 2 |
+
from PyQt6.QtCore import pyqtSignal, QSize, Qt
|
| 3 |
+
from PyQt6.QtGui import QCursor
|
| 4 |
+
from PyQt6 import QtWidgets
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
class CloseableTabWidget(QTabWidget):
|
| 8 |
+
tab_closed = pyqtSignal(QWidget)
|
| 9 |
+
|
| 10 |
+
def __init__(self, parent=None):
|
| 11 |
+
super().__init__(parent)
|
| 12 |
+
self.setTabsClosable(False)
|
| 13 |
+
self.tabCloseRequested.connect(self.closeTab)
|
| 14 |
+
self._tabs = {} # Dictionary to keep track of tab widgets
|
| 15 |
+
self.tabBar().tabMoved.connect(self._handle_tab_moved)
|
| 16 |
+
self.logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
def closeTab(self, index):
|
| 19 |
+
"""Close a tab at the given index"""
|
| 20 |
+
self.logger.debug(f"Attempting to close tab at index {index}")
|
| 21 |
+
|
| 22 |
+
if not (self.count() > 1 and index != 0):
|
| 23 |
+
self.logger.debug("Tab closure conditions not met")
|
| 24 |
+
return
|
| 25 |
+
|
| 26 |
+
widget = self.widget(index)
|
| 27 |
+
if not widget:
|
| 28 |
+
self.logger.warning(f"No widget found at index {index}")
|
| 29 |
+
return
|
| 30 |
+
|
| 31 |
+
# Critical operations need try-catch
|
| 32 |
+
try:
|
| 33 |
+
tab_text = self.tabText(index)
|
| 34 |
+
|
| 35 |
+
# Cleanup controller if exists
|
| 36 |
+
controller = getattr(widget, 'controller', None)
|
| 37 |
+
if controller and hasattr(controller, 'model') and hasattr(controller.model, 'cleanup'):
|
| 38 |
+
controller.model.cleanup()
|
| 39 |
+
|
| 40 |
+
# Remove from tracking and emit signal
|
| 41 |
+
if tab_text in self._tabs:
|
| 42 |
+
del self._tabs[tab_text]
|
| 43 |
+
|
| 44 |
+
self.removeTab(index)
|
| 45 |
+
self.tab_closed.emit(widget)
|
| 46 |
+
widget.deleteLater()
|
| 47 |
+
self._update_all_tabs()
|
| 48 |
+
|
| 49 |
+
self.logger.debug(f"Successfully closed tab '{tab_text}'")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
self.logger.error(f"Failed to close tab: {e}", exc_info=True)
|
| 52 |
+
raise
|
| 53 |
+
|
| 54 |
+
def addTab(self, widget, label):
|
| 55 |
+
try:
|
| 56 |
+
if widget and label:
|
| 57 |
+
# Store widget reference with unique identifier
|
| 58 |
+
tab_id = f"{label}_{id(widget)}"
|
| 59 |
+
self._tabs[tab_id] = {
|
| 60 |
+
'widget': widget,
|
| 61 |
+
'label': label,
|
| 62 |
+
'close_button': None
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
# Add the tab
|
| 66 |
+
index = super().addTab(widget, label)
|
| 67 |
+
|
| 68 |
+
if index != 0:
|
| 69 |
+
# Create and setup close button
|
| 70 |
+
close_button = self._create_close_button(index, label)
|
| 71 |
+
self._tabs[tab_id]['close_button'] = close_button
|
| 72 |
+
self.tabBar().setTabButton(index, QTabBar.ButtonPosition.RightSide, close_button)
|
| 73 |
+
|
| 74 |
+
return index
|
| 75 |
+
except Exception as e:
|
| 76 |
+
self.logger.error(f"Error adding tab: {e}")
|
| 77 |
+
return -1
|
| 78 |
+
|
| 79 |
+
def _create_close_button(self, index, label):
|
| 80 |
+
"""Create a new close button for a tab"""
|
| 81 |
+
close_button = QToolButton(self.tabBar())
|
| 82 |
+
close_button.setObjectName(f"close_button_{label}")
|
| 83 |
+
close_icon = self.style().standardIcon(QtWidgets.QStyle.StandardPixmap.SP_TitleBarCloseButton)
|
| 84 |
+
close_button.setIcon(close_icon)
|
| 85 |
+
close_button.setIconSize(QSize(16, 16))
|
| 86 |
+
close_button.setAutoRaise(True)
|
| 87 |
+
close_button.setStyleSheet("""
|
| 88 |
+
QToolButton {
|
| 89 |
+
border: none;
|
| 90 |
+
padding: 0px;
|
| 91 |
+
}
|
| 92 |
+
QToolButton:hover {
|
| 93 |
+
background: #c42b1c;
|
| 94 |
+
}
|
| 95 |
+
""")
|
| 96 |
+
close_button.setCursor(QCursor(Qt.CursorShape.PointingHandCursor))
|
| 97 |
+
close_button.setFixedSize(18, 18)
|
| 98 |
+
close_button.clicked.connect(lambda checked, idx=index: self.safely_close_tab(idx))
|
| 99 |
+
return close_button
|
| 100 |
+
|
| 101 |
+
def safely_close_tab(self, index):
|
| 102 |
+
"""Safely handle tab closure with error checking"""
|
| 103 |
+
try:
|
| 104 |
+
if 0 <= index < self.count():
|
| 105 |
+
current_widget = self.widget(index)
|
| 106 |
+
if current_widget and index != 0:
|
| 107 |
+
self.closeTab(index)
|
| 108 |
+
except Exception as e:
|
| 109 |
+
self.logger.error(f"Error in safely_close_tab: {e}")
|
| 110 |
+
|
| 111 |
+
def _handle_tab_moved(self, from_index: int, to_index: int):
|
| 112 |
+
"""Handle tab movement and update close buttons"""
|
| 113 |
+
try:
|
| 114 |
+
self._update_all_tabs()
|
| 115 |
+
except Exception as e:
|
| 116 |
+
self.logger.error(f"Error handling tab movement: {e}")
|
| 117 |
+
|
| 118 |
+
def _update_all_tabs(self):
|
| 119 |
+
"""Update all tabs and their close buttons"""
|
| 120 |
+
try:
|
| 121 |
+
for i in range(1, self.count()): # Skip index 0 (home tab)
|
| 122 |
+
widget = self.widget(i)
|
| 123 |
+
if widget:
|
| 124 |
+
label = self.tabText(i)
|
| 125 |
+
tab_id = f"{label}_{id(widget)}"
|
| 126 |
+
|
| 127 |
+
# Create new close button if needed
|
| 128 |
+
if tab_id not in self._tabs or not self._tabs[tab_id].get('close_button'):
|
| 129 |
+
close_button = self._create_close_button(i, label)
|
| 130 |
+
self._tabs[tab_id] = {
|
| 131 |
+
'widget': widget,
|
| 132 |
+
'label': label,
|
| 133 |
+
'close_button': close_button
|
| 134 |
+
}
|
| 135 |
+
self.tabBar().setTabButton(i, QTabBar.ButtonPosition.RightSide, close_button)
|
| 136 |
+
else:
|
| 137 |
+
# Update existing close button's click connection
|
| 138 |
+
close_button = self._tabs[tab_id]['close_button']
|
| 139 |
+
close_button.clicked.disconnect()
|
| 140 |
+
close_button.clicked.connect(lambda checked, idx=i: self.safely_close_tab(idx))
|
| 141 |
+
except Exception as e:
|
| 142 |
+
self.logger.error(f"Error updating tabs: {e}")
|
| 143 |
+
|
| 144 |
+
def moveTab(self, from_index, to_index):
|
| 145 |
+
"""Override moveTab to safely handle tab movement"""
|
| 146 |
+
try:
|
| 147 |
+
if (0 <= from_index < self.count() and
|
| 148 |
+
0 <= to_index < self.count() and
|
| 149 |
+
from_index != 0 and
|
| 150 |
+
to_index != 0):
|
| 151 |
+
|
| 152 |
+
super().moveTab(from_index, to_index)
|
| 153 |
+
self._update_all_tabs()
|
| 154 |
+
|
| 155 |
+
except Exception as e:
|
| 156 |
+
self.logger.error(f"Error moving tab: {e}")
|
|
@@ -1,95 +1,148 @@
|
|
| 1 |
from PyQt6 import QtWidgets
|
| 2 |
-
from PyQt6.QtWidgets import QWidget, QVBoxLayout, QTableWidget, QTableWidgetItem,
|
|
|
|
| 3 |
from PyQt6 import uic
|
| 4 |
from PyQt6.QtCore import Qt, QTimer
|
|
|
|
| 5 |
|
| 6 |
class FindTargetsView(QtWidgets.QMainWindow):
|
| 7 |
def __init__(self, global_settings):
|
| 8 |
super().__init__()
|
| 9 |
self.global_settings = global_settings
|
| 10 |
self._init_ui()
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def _init_ui(self):
|
| 13 |
uic.loadUi(self.global_settings.get_ui_dir_path() + '/find_targets.ui', self)
|
| 14 |
self.results_table = self.findChild(QTableWidget, 'tblTargets')
|
|
|
|
|
|
|
| 15 |
self.results_table.setSelectionBehavior(QTableWidget.SelectionBehavior.SelectRows)
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
#
|
| 18 |
-
self.results_table.
|
| 19 |
-
self.results_table.
|
| 20 |
-
self.results_table.horizontalHeader().setStretchLastSection(True)
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
self.results_table.
|
| 24 |
-
self.results_table.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"Feature Type", "Chromosome/Scaffold #", "Feature ID/Locus Tag",
|
| 26 |
-
"Feature Name", "Feature Description"
|
| 27 |
-
]
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
self.
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
def
|
| 35 |
-
"""
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
def display_results(self, results):
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
self.results_table.setUpdatesEnabled(False)
|
| 45 |
self.results_table.setSortingEnabled(False)
|
|
|
|
| 46 |
|
| 47 |
-
# Set row count
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
self.results_table.setItem(row, 4, self._get_table_item(result['feature_description']))
|
| 57 |
-
self.results_table.setItem(row, 5, self._get_table_item(result['location']))
|
| 58 |
-
self.results_table.setItem(row, 6, self._get_table_item(result['strand']))
|
| 59 |
-
|
| 60 |
-
# Re-enable updates and adjust columns
|
| 61 |
-
QTimer.singleShot(0, self._finish_table_update)
|
| 62 |
-
|
| 63 |
-
def _finish_table_update(self):
|
| 64 |
-
"""Complete table update in the next event loop iteration"""
|
| 65 |
-
self.results_table.resizeColumnsToContents()
|
| 66 |
self.results_table.setUpdatesEnabled(True)
|
| 67 |
self.results_table.setSortingEnabled(True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
def get_selected_targets(self):
|
| 70 |
selected_rows = set(index.row() for index in self.results_table.selectedIndexes())
|
| 71 |
selected_targets = []
|
| 72 |
|
| 73 |
-
# Get column indices once
|
| 74 |
-
columns = {
|
| 75 |
-
'feature_type': 0,
|
| 76 |
-
'chromosome': 1,
|
| 77 |
-
'feature_id': 2,
|
| 78 |
-
'feature_name': 3,
|
| 79 |
-
'feature_description': 4,
|
| 80 |
-
'location': 5,
|
| 81 |
-
'strand': 6
|
| 82 |
-
}
|
| 83 |
-
|
| 84 |
for row in selected_rows:
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
'feature_id': self.results_table.item(row, columns['feature_id']).text(),
|
| 89 |
-
'feature_name': self.results_table.item(row, columns['feature_name']).text(),
|
| 90 |
-
'feature_description': self.results_table.item(row, columns['feature_description']).text(),
|
| 91 |
-
'location': self.results_table.item(row, columns['location']).text(),
|
| 92 |
-
'strand': self.results_table.item(row, columns['strand']).text()
|
| 93 |
-
}
|
| 94 |
-
selected_targets.append(target)
|
| 95 |
return selected_targets
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from PyQt6 import QtWidgets
|
| 2 |
+
from PyQt6.QtWidgets import (QWidget, QVBoxLayout, QTableWidget, QTableWidgetItem,
|
| 3 |
+
QPushButton, QHBoxLayout, QLabel, QAbstractItemView)
|
| 4 |
from PyQt6 import uic
|
| 5 |
from PyQt6.QtCore import Qt, QTimer
|
| 6 |
+
import time
|
| 7 |
|
| 8 |
class FindTargetsView(QtWidgets.QMainWindow):
|
| 9 |
def __init__(self, global_settings):
|
| 10 |
super().__init__()
|
| 11 |
self.global_settings = global_settings
|
| 12 |
self._init_ui()
|
| 13 |
+
self.batch_size = 100 # Number of rows to load at once
|
| 14 |
+
self._all_results = [] # Store all results
|
| 15 |
+
self._loaded_rows = 0 # Track number of loaded rows
|
| 16 |
|
| 17 |
def _init_ui(self):
|
| 18 |
uic.loadUi(self.global_settings.get_ui_dir_path() + '/find_targets.ui', self)
|
| 19 |
self.results_table = self.findChild(QTableWidget, 'tblTargets')
|
| 20 |
+
|
| 21 |
+
# Optimize table settings for large datasets
|
| 22 |
self.results_table.setSelectionBehavior(QTableWidget.SelectionBehavior.SelectRows)
|
| 23 |
+
self.results_table.setShowGrid(False)
|
| 24 |
+
self.results_table.setAlternatingRowColors(True)
|
| 25 |
|
| 26 |
+
# Enable virtual scrolling mode
|
| 27 |
+
self.results_table.setVerticalScrollMode(QTableWidget.ScrollMode.ScrollPerPixel)
|
| 28 |
+
self.results_table.setHorizontalScrollMode(QTableWidget.ScrollMode.ScrollPerPixel)
|
|
|
|
| 29 |
|
| 30 |
+
# Optimize viewport updates
|
| 31 |
+
self.results_table.setVerticalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAlwaysOn)
|
| 32 |
+
self.results_table.viewport().setProperty("cursor", Qt.CursorShape.ArrowCursor)
|
| 33 |
+
|
| 34 |
+
# Set table properties for better performance
|
| 35 |
+
self.results_table.setColumnCount(5) # Reduced from 7 to 5 columns
|
| 36 |
+
headers = [
|
| 37 |
"Feature Type", "Chromosome/Scaffold #", "Feature ID/Locus Tag",
|
| 38 |
+
"Feature Name", "Feature Description"
|
| 39 |
+
]
|
| 40 |
+
self.results_table.setHorizontalHeaderLabels(headers)
|
| 41 |
+
|
| 42 |
+
# Set optimized column widths
|
| 43 |
+
column_widths = [100, 150, 150, 150, 300] # Adjusted widths
|
| 44 |
+
for i, width in enumerate(column_widths):
|
| 45 |
+
self.results_table.setColumnWidth(i, width)
|
| 46 |
+
|
| 47 |
+
self.results_table.horizontalHeader().setStretchLastSection(True)
|
| 48 |
|
| 49 |
+
# Connect scroll events for virtual scrolling
|
| 50 |
+
self.results_table.verticalScrollBar().valueChanged.connect(self._handle_scroll)
|
| 51 |
+
|
| 52 |
+
self.push_button_view_targets = self.findChild(QPushButton, 'pbtnViewTargets')
|
| 53 |
|
| 54 |
+
def _create_table_item(self, text):
|
| 55 |
+
"""Optimized item creation"""
|
| 56 |
+
item = QTableWidgetItem(str(text))
|
| 57 |
+
item.setFlags(item.flags() & ~Qt.ItemFlag.ItemIsEditable)
|
| 58 |
+
return item
|
| 59 |
+
|
| 60 |
+
def _create_row_items(self, result):
|
| 61 |
+
"""Create all items for a row at once"""
|
| 62 |
+
return [
|
| 63 |
+
self._create_table_item(result['feature_type']),
|
| 64 |
+
self._create_table_item(str(result['chromosome'])),
|
| 65 |
+
self._create_table_item(result['feature_id']),
|
| 66 |
+
self._create_table_item(result['feature_name']),
|
| 67 |
+
self._create_table_item(result['feature_description'])
|
| 68 |
+
]
|
| 69 |
|
| 70 |
def display_results(self, results):
|
| 71 |
+
start_time = time.time()
|
| 72 |
+
|
| 73 |
+
# Store all results and reset loaded count
|
| 74 |
+
self._all_results = results
|
| 75 |
+
self._loaded_rows = 0
|
| 76 |
+
|
| 77 |
+
# Disable visual updates
|
| 78 |
self.results_table.setUpdatesEnabled(False)
|
| 79 |
self.results_table.setSortingEnabled(False)
|
| 80 |
+
self.results_table.setVisible(False)
|
| 81 |
|
| 82 |
+
# Set total row count
|
| 83 |
+
total_rows = len(results)
|
| 84 |
+
self.results_table.setRowCount(total_rows)
|
| 85 |
+
|
| 86 |
+
# Load initial batch
|
| 87 |
+
self._load_batch(0, min(self.batch_size, total_rows))
|
| 88 |
+
|
| 89 |
+
# Re-enable table and updates
|
| 90 |
+
self.results_table.setVisible(True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
self.results_table.setUpdatesEnabled(True)
|
| 92 |
self.results_table.setSortingEnabled(True)
|
| 93 |
+
|
| 94 |
+
total_time = time.time() - start_time
|
| 95 |
+
self.global_settings.logger.debug(f"Initial display time: {total_time:.2f} seconds")
|
| 96 |
+
|
| 97 |
+
def _load_batch(self, start_idx, end_idx):
|
| 98 |
+
"""Load a batch of rows efficiently"""
|
| 99 |
+
if start_idx >= len(self._all_results) or start_idx >= end_idx:
|
| 100 |
+
return
|
| 101 |
+
|
| 102 |
+
batch_items = []
|
| 103 |
+
for row in range(start_idx, end_idx):
|
| 104 |
+
if row >= len(self._all_results):
|
| 105 |
+
break
|
| 106 |
+
row_items = self._create_row_items(self._all_results[row])
|
| 107 |
+
batch_items.append((row, row_items))
|
| 108 |
+
|
| 109 |
+
# Batch set items
|
| 110 |
+
for row, items in batch_items:
|
| 111 |
+
for col, item in enumerate(items):
|
| 112 |
+
self.results_table.setItem(row, col, item)
|
| 113 |
+
|
| 114 |
+
self._loaded_rows = end_idx
|
| 115 |
+
|
| 116 |
+
def _handle_scroll(self, value):
|
| 117 |
+
"""Handle scroll events for virtual scrolling"""
|
| 118 |
+
viewport_height = self.results_table.viewport().height()
|
| 119 |
+
row_height = self.results_table.rowHeight(0)
|
| 120 |
+
visible_rows = viewport_height // row_height
|
| 121 |
+
|
| 122 |
+
# Calculate which rows should be visible
|
| 123 |
+
scroll_position = value
|
| 124 |
+
start_row = max(0, scroll_position - visible_rows)
|
| 125 |
+
end_row = min(len(self._all_results), scroll_position + visible_rows * 2)
|
| 126 |
+
|
| 127 |
+
# Load more rows if needed
|
| 128 |
+
if end_row > self._loaded_rows:
|
| 129 |
+
self._load_batch(self._loaded_rows, end_row)
|
| 130 |
|
| 131 |
def get_selected_targets(self):
|
| 132 |
selected_rows = set(index.row() for index in self.results_table.selectedIndexes())
|
| 133 |
selected_targets = []
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
for row in selected_rows:
|
| 136 |
+
if row < len(self._all_results):
|
| 137 |
+
selected_targets.append(self._all_results[row])
|
| 138 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
return selected_targets
|
| 140 |
+
|
| 141 |
+
def clear_results(self):
|
| 142 |
+
"""Clear all results from the table"""
|
| 143 |
+
self.results_table.setUpdatesEnabled(False)
|
| 144 |
+
self.results_table.clearContents()
|
| 145 |
+
self.results_table.setRowCount(0)
|
| 146 |
+
self._all_results = []
|
| 147 |
+
self._loaded_rows = 0
|
| 148 |
+
self.results_table.setUpdatesEnabled(True)
|
|
@@ -7,6 +7,7 @@ class HomeWindowView(QWidget):
|
|
| 7 |
def __init__(self, global_settings):
|
| 8 |
super().__init__()
|
| 9 |
self.global_settings = global_settings
|
|
|
|
| 10 |
self._init_ui()
|
| 11 |
|
| 12 |
def _init_ui(self) -> None:
|
|
@@ -96,9 +97,9 @@ class HomeWindowView(QWidget):
|
|
| 96 |
self.combo_box_organism.clear()
|
| 97 |
self.combo_box_organism.addItems(organisms)
|
| 98 |
|
| 99 |
-
def update_combo_box_annotation_files(self, annotation_files: list) -> None:
|
| 100 |
-
|
| 101 |
-
|
| 102 |
|
| 103 |
def set_progress_bar(self, value: int) -> None:
|
| 104 |
self.progress_bar_find_targets.setValue(value)
|
|
@@ -126,4 +127,24 @@ class HomeWindowView(QWidget):
|
|
| 126 |
return "feature" # Default to feature if somehow none are selected
|
| 127 |
|
| 128 |
def get_annotation_file(self) -> str:
|
| 129 |
-
return self.combo_box_local_annotation_files.currentText()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
def __init__(self, global_settings):
|
| 8 |
super().__init__()
|
| 9 |
self.global_settings = global_settings
|
| 10 |
+
self.logger = self.global_settings.logger
|
| 11 |
self._init_ui()
|
| 12 |
|
| 13 |
def _init_ui(self) -> None:
|
|
|
|
| 97 |
self.combo_box_organism.clear()
|
| 98 |
self.combo_box_organism.addItems(organisms)
|
| 99 |
|
| 100 |
+
# def update_combo_box_annotation_files(self, annotation_files: list) -> None:
|
| 101 |
+
# self.combo_box_local_annotation_files.clear()
|
| 102 |
+
# self.combo_box_local_annotation_files.addItems(annotation_files)
|
| 103 |
|
| 104 |
def set_progress_bar(self, value: int) -> None:
|
| 105 |
self.progress_bar_find_targets.setValue(value)
|
|
|
|
| 127 |
return "feature" # Default to feature if somehow none are selected
|
| 128 |
|
| 129 |
def get_annotation_file(self) -> str:
|
| 130 |
+
return self.combo_box_local_annotation_files.currentText()
|
| 131 |
+
|
| 132 |
+
def update_combo_box_annotation_files(self, files):
|
| 133 |
+
"""Update local annotation files combo box, excluding .index files"""
|
| 134 |
+
try:
|
| 135 |
+
# Clear existing items
|
| 136 |
+
self.combo_box_local_annotation_files.clear()
|
| 137 |
+
|
| 138 |
+
# Filter out .index files
|
| 139 |
+
filtered_files = [f for f in files if not f.endswith('.index')]
|
| 140 |
+
|
| 141 |
+
# Add filtered files to combo box
|
| 142 |
+
if filtered_files:
|
| 143 |
+
self.combo_box_local_annotation_files.addItems(filtered_files)
|
| 144 |
+
self.combo_box_local_annotation_files.setCurrentIndex(0)
|
| 145 |
+
self.logger.debug(f"Added {len(filtered_files)} local annotation files to combo box")
|
| 146 |
+
else:
|
| 147 |
+
self.logger.debug("No local annotation files found")
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
self.logger.error(f"Error updating local annotation files: {str(e)}")
|
|
@@ -1,238 +1,68 @@
|
|
| 1 |
from PyQt6.QtWidgets import (
|
| 2 |
-
QMainWindow, QPushButton,
|
| 3 |
-
|
| 4 |
-
QHBoxLayout, QLabel, QFrame, QTabWidget, QToolButton, QTabBar
|
| 5 |
)
|
| 6 |
-
from PyQt6.QtGui import QIcon, QAction
|
| 7 |
-
from PyQt6.QtCore import Qt
|
| 8 |
from PyQt6 import uic, QtWidgets, QtCore, QtGui
|
| 9 |
-
from utils.ui import
|
|
|
|
| 10 |
import os
|
| 11 |
from typing import Optional
|
| 12 |
-
from functools import partial
|
| 13 |
import qdarktheme
|
|
|
|
| 14 |
|
| 15 |
-
class
|
| 16 |
-
tab_closed = pyqtSignal(QWidget)
|
| 17 |
-
|
| 18 |
-
def __init__(self, parent=None):
|
| 19 |
-
super().__init__(parent)
|
| 20 |
-
self.setTabsClosable(False)
|
| 21 |
-
self.tabCloseRequested.connect(self.closeTab)
|
| 22 |
-
self._tabs = {} # Dictionary to keep track of tab widgets
|
| 23 |
-
self.tabBar().tabMoved.connect(self._handle_tab_moved)
|
| 24 |
-
|
| 25 |
-
def closeTab(self, index):
|
| 26 |
-
try:
|
| 27 |
-
if self.count() > 1 and index != 0:
|
| 28 |
-
widget = self.widget(index)
|
| 29 |
-
if widget:
|
| 30 |
-
# Get tab text before removal
|
| 31 |
-
tab_text = self.tabText(index)
|
| 32 |
-
|
| 33 |
-
# Clean up the controller if it exists
|
| 34 |
-
controller = getattr(widget, 'controller', None)
|
| 35 |
-
if controller and hasattr(controller, 'model') and hasattr(controller.model, 'cleanup'):
|
| 36 |
-
controller.model.cleanup()
|
| 37 |
-
|
| 38 |
-
# Remove from tracking dictionary
|
| 39 |
-
if tab_text in self._tabs:
|
| 40 |
-
del self._tabs[tab_text]
|
| 41 |
-
|
| 42 |
-
# Remove the tab
|
| 43 |
-
self.removeTab(index)
|
| 44 |
-
|
| 45 |
-
# Emit signal before deletion
|
| 46 |
-
self.tab_closed.emit(widget)
|
| 47 |
-
|
| 48 |
-
# Schedule widget for deletion
|
| 49 |
-
widget.deleteLater()
|
| 50 |
-
|
| 51 |
-
# Update all remaining tabs
|
| 52 |
-
self._update_all_tabs()
|
| 53 |
-
except Exception as e:
|
| 54 |
-
print(f"Error closing tab: {e}")
|
| 55 |
-
|
| 56 |
-
def addTab(self, widget, label):
|
| 57 |
-
try:
|
| 58 |
-
if widget and label:
|
| 59 |
-
# Store widget reference with unique identifier
|
| 60 |
-
tab_id = f"{label}_{id(widget)}"
|
| 61 |
-
self._tabs[tab_id] = {
|
| 62 |
-
'widget': widget,
|
| 63 |
-
'label': label,
|
| 64 |
-
'close_button': None
|
| 65 |
-
}
|
| 66 |
-
|
| 67 |
-
# Add the tab
|
| 68 |
-
index = super().addTab(widget, label)
|
| 69 |
-
|
| 70 |
-
if index != 0:
|
| 71 |
-
# Create and setup close button
|
| 72 |
-
close_button = self._create_close_button(index, label)
|
| 73 |
-
self._tabs[tab_id]['close_button'] = close_button
|
| 74 |
-
self.tabBar().setTabButton(index, QTabBar.ButtonPosition.RightSide, close_button)
|
| 75 |
-
|
| 76 |
-
return index
|
| 77 |
-
except Exception as e:
|
| 78 |
-
print(f"Error adding tab: {e}")
|
| 79 |
-
return -1
|
| 80 |
-
|
| 81 |
-
def _create_close_button(self, index, label):
|
| 82 |
-
"""Create a new close button for a tab"""
|
| 83 |
-
close_button = QToolButton(self.tabBar())
|
| 84 |
-
close_button.setObjectName(f"close_button_{label}")
|
| 85 |
-
close_icon = self.style().standardIcon(QtWidgets.QStyle.StandardPixmap.SP_TitleBarCloseButton)
|
| 86 |
-
close_button.setIcon(close_icon)
|
| 87 |
-
close_button.setIconSize(QSize(16, 16))
|
| 88 |
-
close_button.setAutoRaise(True)
|
| 89 |
-
close_button.setStyleSheet("""
|
| 90 |
-
QToolButton {
|
| 91 |
-
border: none;
|
| 92 |
-
padding: 0px;
|
| 93 |
-
}
|
| 94 |
-
QToolButton:hover {
|
| 95 |
-
background: #c42b1c;
|
| 96 |
-
}
|
| 97 |
-
""")
|
| 98 |
-
close_button.setCursor(QCursor(Qt.CursorShape.PointingHandCursor))
|
| 99 |
-
close_button.setFixedSize(18, 18)
|
| 100 |
-
close_button.clicked.connect(lambda checked, idx=index: self.safely_close_tab(idx))
|
| 101 |
-
return close_button
|
| 102 |
-
|
| 103 |
-
def safely_close_tab(self, index):
|
| 104 |
-
"""Safely handle tab closure with error checking"""
|
| 105 |
-
try:
|
| 106 |
-
if 0 <= index < self.count():
|
| 107 |
-
current_widget = self.widget(index)
|
| 108 |
-
if current_widget and index != 0:
|
| 109 |
-
self.closeTab(index)
|
| 110 |
-
except Exception as e:
|
| 111 |
-
print(f"Error in safely_close_tab: {e}")
|
| 112 |
-
|
| 113 |
-
def _handle_tab_moved(self, from_index: int, to_index: int):
|
| 114 |
-
"""Handle tab movement and update close buttons"""
|
| 115 |
-
try:
|
| 116 |
-
self._update_all_tabs()
|
| 117 |
-
except Exception as e:
|
| 118 |
-
print(f"Error handling tab movement: {e}")
|
| 119 |
-
|
| 120 |
-
def _update_all_tabs(self):
|
| 121 |
-
"""Update all tabs and their close buttons"""
|
| 122 |
-
try:
|
| 123 |
-
for i in range(1, self.count()): # Skip index 0 (home tab)
|
| 124 |
-
widget = self.widget(i)
|
| 125 |
-
if widget:
|
| 126 |
-
label = self.tabText(i)
|
| 127 |
-
tab_id = f"{label}_{id(widget)}"
|
| 128 |
-
|
| 129 |
-
# Create new close button if needed
|
| 130 |
-
if tab_id not in self._tabs or not self._tabs[tab_id].get('close_button'):
|
| 131 |
-
close_button = self._create_close_button(i, label)
|
| 132 |
-
self._tabs[tab_id] = {
|
| 133 |
-
'widget': widget,
|
| 134 |
-
'label': label,
|
| 135 |
-
'close_button': close_button
|
| 136 |
-
}
|
| 137 |
-
self.tabBar().setTabButton(i, QTabBar.ButtonPosition.RightSide, close_button)
|
| 138 |
-
else:
|
| 139 |
-
# Update existing close button's click connection
|
| 140 |
-
close_button = self._tabs[tab_id]['close_button']
|
| 141 |
-
close_button.clicked.disconnect()
|
| 142 |
-
close_button.clicked.connect(lambda checked, idx=i: self.safely_close_tab(idx))
|
| 143 |
-
except Exception as e:
|
| 144 |
-
print(f"Error updating tabs: {e}")
|
| 145 |
-
|
| 146 |
-
def moveTab(self, from_index, to_index):
|
| 147 |
-
"""Override moveTab to safely handle tab movement"""
|
| 148 |
-
try:
|
| 149 |
-
if (0 <= from_index < self.count() and
|
| 150 |
-
0 <= to_index < self.count() and
|
| 151 |
-
from_index != 0 and
|
| 152 |
-
to_index != 0):
|
| 153 |
-
|
| 154 |
-
super().moveTab(from_index, to_index)
|
| 155 |
-
self._update_all_tabs()
|
| 156 |
-
|
| 157 |
-
except Exception as e:
|
| 158 |
-
print(f"Error moving tab: {e}")
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
class MainWindowView(QMainWindow):
|
| 162 |
def __init__(self, global_settings):
|
| 163 |
-
|
| 164 |
-
self
|
| 165 |
-
self.
|
| 166 |
self._init_ui()
|
| 167 |
self.oldPos = None
|
| 168 |
|
| 169 |
def _init_ui(self) -> None:
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
# Set position and size in a single operation
|
| 194 |
-
self.setGeometry(x, y, final_size.width(), final_size.height())
|
| 195 |
-
|
| 196 |
-
# Re-enable updates and show window
|
| 197 |
-
self.setUpdatesEnabled(True)
|
| 198 |
-
self.show()
|
| 199 |
-
self.repaint() # Force immediate repaint
|
| 200 |
-
|
| 201 |
-
self.logger.debug(f"Window initialized at position ({x}, {y}) with size {final_size}")
|
| 202 |
-
except Exception as e:
|
| 203 |
-
self._handle_init_error(e)
|
| 204 |
-
|
| 205 |
-
def _load_ui_file(self) -> None:
|
| 206 |
-
ui_file = os.path.join(self.global_settings.get_ui_dir_path(), "main_window.ui")
|
| 207 |
-
uic.loadUi(ui_file, self)
|
| 208 |
|
| 209 |
def _init_window_properties(self) -> None:
|
| 210 |
-
"""
|
| 211 |
-
Creates a frameless, translucent window without a toolbar.
|
| 212 |
-
"""
|
| 213 |
-
# Set window flags before other properties
|
| 214 |
self.setWindowFlags(Qt.WindowType.FramelessWindowHint)
|
| 215 |
self.setAttribute(Qt.WidgetAttribute.WA_TranslucentBackground)
|
| 216 |
self.setAttribute(Qt.WidgetAttribute.WA_NoSystemBackground)
|
| 217 |
-
|
| 218 |
toolbars = self.findChildren(QtWidgets.QToolBar)
|
| 219 |
for toolbar in toolbars:
|
| 220 |
toolbar.hide()
|
| 221 |
-
# Ensure window starts hidden
|
| 222 |
-
self.setVisible(False)
|
| 223 |
|
| 224 |
def _init_ui_elements(self) -> None:
|
| 225 |
-
# Initialize menu bar and custom title bar
|
| 226 |
self._init_menuBar()
|
| 227 |
self._init_custom_title_bar()
|
| 228 |
|
| 229 |
-
# Create main widget and layout
|
| 230 |
main_widget = QWidget()
|
| 231 |
main_layout = QVBoxLayout(main_widget)
|
| 232 |
main_layout.setContentsMargins(0, 0, 0, 0)
|
| 233 |
main_layout.setSpacing(0)
|
| 234 |
|
| 235 |
-
# Add title bar and divider
|
| 236 |
main_layout.addWidget(self.title_bar, 0)
|
| 237 |
main_layout.addWidget(self._init_divider(), 0)
|
| 238 |
|
|
@@ -242,16 +72,14 @@ class MainWindowView(QMainWindow):
|
|
| 242 |
tab_container_layout.setContentsMargins(0, 0, 0, 0)
|
| 243 |
tab_container_layout.setSpacing(0)
|
| 244 |
|
| 245 |
-
# Use the _add_new_tab method to add a new tab
|
| 246 |
-
# self._add_new_tab()
|
| 247 |
-
|
| 248 |
# Initialize and add CloseableTabWidget
|
| 249 |
self.tab_widget = CloseableTabWidget(self)
|
| 250 |
-
self.tab_widget.setSizePolicy(QtWidgets.QSizePolicy.Policy.Expanding,
|
|
|
|
| 251 |
self.tab_widget.setStyleSheet("""
|
| 252 |
QTabWidget::pane {
|
| 253 |
border: 1px solid #444444;
|
| 254 |
-
padding: 10px;
|
| 255 |
}
|
| 256 |
""")
|
| 257 |
tab_container_layout.addWidget(self.tab_widget)
|
|
@@ -271,40 +99,35 @@ class MainWindowView(QMainWindow):
|
|
| 271 |
self.action_open_NCBI = self._find_widget("actGoToNCBI", QAction)
|
| 272 |
|
| 273 |
def _find_widget(self, name: str, widget_type: type) -> Optional[QtWidgets.QWidget]:
|
|
|
|
| 274 |
widget = self.findChild(widget_type, name)
|
| 275 |
if widget is None:
|
| 276 |
-
self.
|
| 277 |
return widget
|
| 278 |
-
|
| 279 |
def _init_custom_title_bar(self) -> None:
|
| 280 |
self.title_bar = QWidget(self)
|
| 281 |
self.title_bar.setObjectName("custom_title_bar")
|
| 282 |
-
self.title_bar.setFixedHeight(32)
|
| 283 |
|
| 284 |
# Create the main horizontal layout for the title bar
|
| 285 |
layout = QHBoxLayout(self.title_bar)
|
| 286 |
-
layout.setContentsMargins(10, 0, 10, 0)
|
| 287 |
-
layout.setSpacing(5)
|
| 288 |
|
| 289 |
# ----- Window Control Buttons -----
|
| 290 |
-
button_font = QFont("Arial", 8)
|
| 291 |
-
|
| 292 |
self.minimize_window_button = QPushButton("-", self.title_bar)
|
| 293 |
self.minimize_window_button.setObjectName("minimize_window_button")
|
| 294 |
self.minimize_window_button.setFixedSize(20, 20)
|
| 295 |
-
self.minimize_window_button.setFont(button_font)
|
| 296 |
|
| 297 |
self.maximize_window_button = QPushButton("⛶", self.title_bar)
|
| 298 |
self.maximize_window_button.setObjectName("maximize_window_button")
|
| 299 |
self.maximize_window_button.setFixedSize(20, 20)
|
| 300 |
-
self.maximize_window_button.setFont(button_font)
|
| 301 |
|
| 302 |
self.close_window_button = QPushButton("✕", self.title_bar)
|
| 303 |
self.close_window_button.setObjectName("close_window_button")
|
| 304 |
self.close_window_button.setFixedSize(20, 20)
|
| 305 |
-
self.close_window_button.setFont(button_font)
|
| 306 |
|
| 307 |
-
# Apply a style to center the text vertically and horizontally
|
| 308 |
button_style = """
|
| 309 |
QPushButton {
|
| 310 |
padding: 0px;
|
|
@@ -343,7 +166,6 @@ class MainWindowView(QMainWindow):
|
|
| 343 |
right_layout.addStretch()
|
| 344 |
right_layout.addWidget(self.theme_toggle_button)
|
| 345 |
|
| 346 |
-
# ----- Synchronize Widths of Left and Right Widgets -----
|
| 347 |
# Adjust left_widget to calculate its required width
|
| 348 |
left_widget.adjustSize()
|
| 349 |
left_width = left_widget.sizeHint().width()
|
|
@@ -351,10 +173,8 @@ class MainWindowView(QMainWindow):
|
|
| 351 |
# Set right_widget's fixed width to match left_widget's width
|
| 352 |
right_widget.setFixedWidth(left_width)
|
| 353 |
|
| 354 |
-
# ----- Title Label -----
|
| 355 |
self.title_label = QLabel("CASPER", self.title_bar)
|
| 356 |
self.title_label.setObjectName("title_label")
|
| 357 |
-
self.title_label.setFont(QFont("Arial", 10, QFont.Weight.Bold))
|
| 358 |
self.title_label.setAlignment(Qt.AlignmentFlag.AlignCenter) # Center the text in the label
|
| 359 |
|
| 360 |
# Add Widgets to the Main Title Bar Layout
|
|
@@ -364,6 +184,11 @@ class MainWindowView(QMainWindow):
|
|
| 364 |
layout.addStretch(1)
|
| 365 |
layout.addWidget(right_widget)
|
| 366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
|
| 368 |
def _init_divider(self):
|
| 369 |
divider = QFrame()
|
|
@@ -372,85 +197,26 @@ class MainWindowView(QMainWindow):
|
|
| 372 |
divider.setFrameShadow(QFrame.Shadow.Sunken)
|
| 373 |
return divider
|
| 374 |
|
| 375 |
-
# def _add_new_tab(self):
|
| 376 |
-
# new_tab = QWidget()
|
| 377 |
-
# layout = QVBoxLayout(new_tab)
|
| 378 |
-
|
| 379 |
-
# label = QLabel("This is a new tab", new_tab)
|
| 380 |
-
# layout.addWidget(label)
|
| 381 |
-
|
| 382 |
-
# new_tab_button = QPushButton("Open New Tab", new_tab)
|
| 383 |
-
# new_tab_button.clicked.connect(self._add_new_tab)
|
| 384 |
-
# layout.addWidget(new_tab_button)
|
| 385 |
-
# tab_index = self.tab_widget.addTab(new_tab, f"Tab {self.tab_widget.count() + 1}")
|
| 386 |
-
# self.tab_widget.setCurrentIndex(tab_index)
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
def _scale_ui(self):
|
| 390 |
-
"""Modified scale_ui to only handle sizing, not positioning"""
|
| 391 |
-
try:
|
| 392 |
-
screen = QtGui.QGuiApplication.primaryScreen()
|
| 393 |
-
screen_geometry = screen.geometry()
|
| 394 |
-
width = screen_geometry.width()
|
| 395 |
-
height = screen_geometry.height()
|
| 396 |
-
|
| 397 |
-
# Font scaling
|
| 398 |
-
self.centralWidget().setStyleSheet(f"font: 12pt 'Arial';")
|
| 399 |
-
|
| 400 |
-
if hasattr(self, 'title'):
|
| 401 |
-
scaled_title_font_size = int(30 * (width / 1920))
|
| 402 |
-
self.title.setStyleSheet(f"font: bold {scaled_title_font_size}pt 'Arial';")
|
| 403 |
-
|
| 404 |
-
# Calculate size only
|
| 405 |
-
scaledWidth = int((width * 575) / 1920)
|
| 406 |
-
scaledHeight = int((height * 400) / 1080)
|
| 407 |
-
|
| 408 |
-
# Ensure minimum size
|
| 409 |
-
self.adjustSize()
|
| 410 |
-
currentWidth = self.size().width()
|
| 411 |
-
currentHeight = self.size().height()
|
| 412 |
-
|
| 413 |
-
if scaledHeight < currentHeight:
|
| 414 |
-
scaledHeight = currentHeight
|
| 415 |
-
if scaledWidth < currentWidth:
|
| 416 |
-
scaledWidth = currentWidth
|
| 417 |
-
|
| 418 |
-
# Only resize, don't reposition
|
| 419 |
-
self.resize(scaledWidth, scaledHeight)
|
| 420 |
-
|
| 421 |
-
except Exception as e:
|
| 422 |
-
self.logger.error(f"Error in _scale_ui: {str(e)}")
|
| 423 |
-
|
| 424 |
def _handle_init_error(self, e: Exception) -> None:
|
| 425 |
error_msg = f"Error initializing MainWindowView: {str(e)}"
|
| 426 |
-
self.
|
| 427 |
-
show_error(self.
|
| 428 |
raise
|
| 429 |
|
| 430 |
def update_theme_icon(self) -> None:
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
self.
|
| 440 |
-
|
| 441 |
-
def mouseMoveEvent(self, event):
|
| 442 |
-
if self.oldPos:
|
| 443 |
-
delta = event.globalPosition().toPoint() - self.oldPos
|
| 444 |
-
self.move(self.x() + delta.x(), self.y() + delta.y())
|
| 445 |
-
self.oldPos = event.globalPosition().toPoint()
|
| 446 |
-
|
| 447 |
-
def mouseReleaseEvent(self, event):
|
| 448 |
-
if event.button() == Qt.MouseButton.LeftButton:
|
| 449 |
-
self.oldPos = None
|
| 450 |
|
| 451 |
def resizeEvent(self, event):
|
| 452 |
super().resizeEvent(event)
|
| 453 |
-
self.
|
| 454 |
|
| 455 |
def apply_theme(self):
|
| 456 |
themes = {
|
|
@@ -488,11 +254,8 @@ class MainWindowView(QMainWindow):
|
|
| 488 |
}
|
| 489 |
}
|
| 490 |
|
| 491 |
-
|
| 492 |
-
current_theme = self.global_settings.get_theme()
|
| 493 |
theme = themes["dark"] if current_theme == "dark" else themes["light"]
|
| 494 |
-
|
| 495 |
-
# Apply the selected theme using qdarktheme
|
| 496 |
qdarktheme.setup_theme(current_theme)
|
| 497 |
|
| 498 |
# Set the stylesheet
|
|
@@ -542,13 +305,19 @@ class MainWindowView(QMainWindow):
|
|
| 542 |
}}
|
| 543 |
""")
|
| 544 |
|
| 545 |
-
def
|
| 546 |
-
"""
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from PyQt6.QtWidgets import (
|
| 2 |
+
QMainWindow, QPushButton, QWidget, QVBoxLayout,
|
| 3 |
+
QHBoxLayout, QLabel, QFrame,
|
|
|
|
| 4 |
)
|
| 5 |
+
from PyQt6.QtGui import QIcon, QAction
|
| 6 |
+
from PyQt6.QtCore import Qt
|
| 7 |
from PyQt6 import uic, QtWidgets, QtCore, QtGui
|
| 8 |
+
from utils.ui import show_error
|
| 9 |
+
from utils.LoggingMixin import LoggingMixin
|
| 10 |
import os
|
| 11 |
from typing import Optional
|
|
|
|
| 12 |
import qdarktheme
|
| 13 |
+
from views.CloseableTabWidget import CloseableTabWidget
|
| 14 |
|
| 15 |
+
class MainWindowView(QMainWindow, LoggingMixin):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
def __init__(self, global_settings):
|
| 17 |
+
QMainWindow.__init__(self)
|
| 18 |
+
LoggingMixin.__init__(self)
|
| 19 |
+
self.settings = global_settings
|
| 20 |
self._init_ui()
|
| 21 |
self.oldPos = None
|
| 22 |
|
| 23 |
def _init_ui(self) -> None:
|
| 24 |
+
self.log_method_call("_init_ui")
|
| 25 |
+
|
| 26 |
+
screen = QtGui.QGuiApplication.primaryScreen()
|
| 27 |
+
screen_geometry = screen.geometry()
|
| 28 |
+
centerPoint = screen_geometry.center()
|
| 29 |
+
|
| 30 |
+
# Load UI file
|
| 31 |
+
uic.loadUi(self.settings.get_ui_dir_path() + "/main_window.ui", self)
|
| 32 |
+
self._init_window_properties()
|
| 33 |
+
self._init_ui_elements()
|
| 34 |
+
self.apply_theme()
|
| 35 |
+
|
| 36 |
+
# Calculate and set position
|
| 37 |
+
final_size = self.size()
|
| 38 |
+
x = centerPoint.x() - (final_size.width() // 2)
|
| 39 |
+
y = centerPoint.y() - (final_size.height() // 2)
|
| 40 |
+
|
| 41 |
+
self.setGeometry(x, y, final_size.width(), final_size.height())
|
| 42 |
+
self.setUpdatesEnabled(True)
|
| 43 |
+
self.show()
|
| 44 |
+
self.repaint()
|
| 45 |
+
|
| 46 |
+
self.log_debug(f"Window initialized at position ({x}, {y}) with size {final_size}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def _init_window_properties(self) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
self.setWindowFlags(Qt.WindowType.FramelessWindowHint)
|
| 50 |
self.setAttribute(Qt.WidgetAttribute.WA_TranslucentBackground)
|
| 51 |
self.setAttribute(Qt.WidgetAttribute.WA_NoSystemBackground)
|
| 52 |
+
|
| 53 |
toolbars = self.findChildren(QtWidgets.QToolBar)
|
| 54 |
for toolbar in toolbars:
|
| 55 |
toolbar.hide()
|
|
|
|
|
|
|
| 56 |
|
| 57 |
def _init_ui_elements(self) -> None:
|
|
|
|
| 58 |
self._init_menuBar()
|
| 59 |
self._init_custom_title_bar()
|
| 60 |
|
|
|
|
| 61 |
main_widget = QWidget()
|
| 62 |
main_layout = QVBoxLayout(main_widget)
|
| 63 |
main_layout.setContentsMargins(0, 0, 0, 0)
|
| 64 |
main_layout.setSpacing(0)
|
| 65 |
|
|
|
|
| 66 |
main_layout.addWidget(self.title_bar, 0)
|
| 67 |
main_layout.addWidget(self._init_divider(), 0)
|
| 68 |
|
|
|
|
| 72 |
tab_container_layout.setContentsMargins(0, 0, 0, 0)
|
| 73 |
tab_container_layout.setSpacing(0)
|
| 74 |
|
|
|
|
|
|
|
|
|
|
| 75 |
# Initialize and add CloseableTabWidget
|
| 76 |
self.tab_widget = CloseableTabWidget(self)
|
| 77 |
+
self.tab_widget.setSizePolicy(QtWidgets.QSizePolicy.Policy.Expanding,
|
| 78 |
+
QtWidgets.QSizePolicy.Policy.Expanding)
|
| 79 |
self.tab_widget.setStyleSheet("""
|
| 80 |
QTabWidget::pane {
|
| 81 |
border: 1px solid #444444;
|
| 82 |
+
padding: 10px;
|
| 83 |
}
|
| 84 |
""")
|
| 85 |
tab_container_layout.addWidget(self.tab_widget)
|
|
|
|
| 99 |
self.action_open_NCBI = self._find_widget("actGoToNCBI", QAction)
|
| 100 |
|
| 101 |
def _find_widget(self, name: str, widget_type: type) -> Optional[QtWidgets.QWidget]:
|
| 102 |
+
"""Find a widget by name and type"""
|
| 103 |
widget = self.findChild(widget_type, name)
|
| 104 |
if widget is None:
|
| 105 |
+
self.log_warning(f"Widget '{name}' not found in UI file")
|
| 106 |
return widget
|
| 107 |
+
|
| 108 |
def _init_custom_title_bar(self) -> None:
|
| 109 |
self.title_bar = QWidget(self)
|
| 110 |
self.title_bar.setObjectName("custom_title_bar")
|
| 111 |
+
self.title_bar.setFixedHeight(32)
|
| 112 |
|
| 113 |
# Create the main horizontal layout for the title bar
|
| 114 |
layout = QHBoxLayout(self.title_bar)
|
| 115 |
+
layout.setContentsMargins(10, 0, 10, 0)
|
| 116 |
+
layout.setSpacing(5)
|
| 117 |
|
| 118 |
# ----- Window Control Buttons -----
|
|
|
|
|
|
|
| 119 |
self.minimize_window_button = QPushButton("-", self.title_bar)
|
| 120 |
self.minimize_window_button.setObjectName("minimize_window_button")
|
| 121 |
self.minimize_window_button.setFixedSize(20, 20)
|
|
|
|
| 122 |
|
| 123 |
self.maximize_window_button = QPushButton("⛶", self.title_bar)
|
| 124 |
self.maximize_window_button.setObjectName("maximize_window_button")
|
| 125 |
self.maximize_window_button.setFixedSize(20, 20)
|
|
|
|
| 126 |
|
| 127 |
self.close_window_button = QPushButton("✕", self.title_bar)
|
| 128 |
self.close_window_button.setObjectName("close_window_button")
|
| 129 |
self.close_window_button.setFixedSize(20, 20)
|
|
|
|
| 130 |
|
|
|
|
| 131 |
button_style = """
|
| 132 |
QPushButton {
|
| 133 |
padding: 0px;
|
|
|
|
| 166 |
right_layout.addStretch()
|
| 167 |
right_layout.addWidget(self.theme_toggle_button)
|
| 168 |
|
|
|
|
| 169 |
# Adjust left_widget to calculate its required width
|
| 170 |
left_widget.adjustSize()
|
| 171 |
left_width = left_widget.sizeHint().width()
|
|
|
|
| 173 |
# Set right_widget's fixed width to match left_widget's width
|
| 174 |
right_widget.setFixedWidth(left_width)
|
| 175 |
|
|
|
|
| 176 |
self.title_label = QLabel("CASPER", self.title_bar)
|
| 177 |
self.title_label.setObjectName("title_label")
|
|
|
|
| 178 |
self.title_label.setAlignment(Qt.AlignmentFlag.AlignCenter) # Center the text in the label
|
| 179 |
|
| 180 |
# Add Widgets to the Main Title Bar Layout
|
|
|
|
| 184 |
layout.addStretch(1)
|
| 185 |
layout.addWidget(right_widget)
|
| 186 |
|
| 187 |
+
# Add mouse tracking to the title bar
|
| 188 |
+
self.title_bar.mousePressEvent = self.mousePressEvent
|
| 189 |
+
self.title_bar.mouseMoveEvent = self.mouseMoveEvent
|
| 190 |
+
self.title_bar.mouseReleaseEvent = self.mouseReleaseEvent
|
| 191 |
+
self.title_bar.setMouseTracking(True)
|
| 192 |
|
| 193 |
def _init_divider(self):
|
| 194 |
divider = QFrame()
|
|
|
|
| 197 |
divider.setFrameShadow(QFrame.Shadow.Sunken)
|
| 198 |
return divider
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
def _handle_init_error(self, e: Exception) -> None:
|
| 201 |
error_msg = f"Error initializing MainWindowView: {str(e)}"
|
| 202 |
+
self.log_error("_init_ui", e)
|
| 203 |
+
show_error(self.settings, "Initialization Error", error_msg)
|
| 204 |
raise
|
| 205 |
|
| 206 |
def update_theme_icon(self) -> None:
|
| 207 |
+
try:
|
| 208 |
+
icon_name = "dark_mode.png" if self.settings.get_theme() == "dark" else "light_mode.png"
|
| 209 |
+
icon_path = os.path.join(self.settings.get_assets_dir_path(), icon_name)
|
| 210 |
+
icon = QIcon(icon_path)
|
| 211 |
+
self.theme_toggle_button.setIcon(icon)
|
| 212 |
+
self.theme_toggle_button.setIconSize(QtCore.QSize(16, 16))
|
| 213 |
+
except Exception as e:
|
| 214 |
+
self.log_error("update_theme_icon", e)
|
| 215 |
+
show_error(self.settings, "Theme Error", "Failed to update theme icon")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
def resizeEvent(self, event):
|
| 218 |
super().resizeEvent(event)
|
| 219 |
+
self.log_debug(f"Window resized. New size: {self.size()}")
|
| 220 |
|
| 221 |
def apply_theme(self):
|
| 222 |
themes = {
|
|
|
|
| 254 |
}
|
| 255 |
}
|
| 256 |
|
| 257 |
+
current_theme = self.settings.get_theme()
|
|
|
|
| 258 |
theme = themes["dark"] if current_theme == "dark" else themes["light"]
|
|
|
|
|
|
|
| 259 |
qdarktheme.setup_theme(current_theme)
|
| 260 |
|
| 261 |
# Set the stylesheet
|
|
|
|
| 305 |
}}
|
| 306 |
""")
|
| 307 |
|
| 308 |
+
def mousePressEvent(self, event):
|
| 309 |
+
"""Handle mouse press events for window dragging"""
|
| 310 |
+
if event.button() == Qt.MouseButton.LeftButton:
|
| 311 |
+
self.oldPos = event.globalPosition().toPoint()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
+
def mouseMoveEvent(self, event):
|
| 314 |
+
"""Handle mouse move events for window dragging"""
|
| 315 |
+
if self.oldPos is not None:
|
| 316 |
+
delta = event.globalPosition().toPoint() - self.oldPos
|
| 317 |
+
self.move(self.x() + delta.x(), self.y() + delta.y())
|
| 318 |
+
self.oldPos = event.globalPosition().toPoint()
|
| 319 |
|
| 320 |
+
def mouseReleaseEvent(self, event):
|
| 321 |
+
"""Handle mouse release events for window dragging"""
|
| 322 |
+
if event.button() == Qt.MouseButton.LeftButton:
|
| 323 |
+
self.oldPos = None
|
|
@@ -1,12 +1,11 @@
|
|
| 1 |
from typing import Optional
|
| 2 |
from PyQt6 import QtWidgets, uic, QtGui
|
| 3 |
from PyQt6.QtWidgets import QTableWidgetItem, QAbstractItemView
|
| 4 |
-
from PyQt6.QtGui import QIcon
|
| 5 |
from PyQt6.QtCore import Qt
|
| 6 |
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg, NavigationToolbar2QT
|
| 7 |
from matplotlib.figure import Figure
|
| 8 |
from matplotlib.ticker import MaxNLocator
|
| 9 |
-
from utils.ui import show_error
|
| 10 |
|
| 11 |
class MultitargetingWindowView(QtWidgets.QMainWindow):
|
| 12 |
def __init__(self, global_settings):
|
|
|
|
| 1 |
from typing import Optional
|
| 2 |
from PyQt6 import QtWidgets, uic, QtGui
|
| 3 |
from PyQt6.QtWidgets import QTableWidgetItem, QAbstractItemView
|
|
|
|
| 4 |
from PyQt6.QtCore import Qt
|
| 5 |
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg, NavigationToolbar2QT
|
| 6 |
from matplotlib.figure import Figure
|
| 7 |
from matplotlib.ticker import MaxNLocator
|
| 8 |
+
from utils.ui import show_error
|
| 9 |
|
| 10 |
class MultitargetingWindowView(QtWidgets.QMainWindow):
|
| 11 |
def __init__(self, global_settings):
|
|
@@ -1,228 +0,0 @@
|
|
| 1 |
-
import sys, os
|
| 2 |
-
from PyQt5 import QtWidgets, uic, QtGui, QtCore, Qt
|
| 3 |
-
import models.GlobalSettings as GlobalSettings
|
| 4 |
-
from PyQt5.QtGui import QIntValidator
|
| 5 |
-
import traceback
|
| 6 |
-
import math
|
| 7 |
-
from utils.ui import show_message, show_error, scale_ui, center_ui
|
| 8 |
-
|
| 9 |
-
logger = GlobalSettings.logger
|
| 10 |
-
|
| 11 |
-
class NewEndonuclease(QtWidgets.QMainWindow):
|
| 12 |
-
def __init__(self):
|
| 13 |
-
print("Initializing NewEndonuclease class")
|
| 14 |
-
try:
|
| 15 |
-
super(NewEndonuclease, self).__init__()
|
| 16 |
-
uic.loadUi(GlobalSettings.appdir + 'ui/newendonuclease.ui', self)
|
| 17 |
-
self.setWindowIcon(Qt.QIcon(GlobalSettings.appdir + "cas9image.ico"))
|
| 18 |
-
self.setWindowTitle('New Endonuclease')
|
| 19 |
-
self.error = False
|
| 20 |
-
pamFlag = False
|
| 21 |
-
|
| 22 |
-
self.onList = []
|
| 23 |
-
self.offList = []
|
| 24 |
-
|
| 25 |
-
self.onList, self.offList = self.get_on_off_data() ### Call function to fill on- and off- data name lists
|
| 26 |
-
|
| 27 |
-
for name in self.onList: ### Add on-target names to drop-down
|
| 28 |
-
self.comboBox.addItem(str(name))
|
| 29 |
-
|
| 30 |
-
for name in self.offList: ### Add off-target names to drop-down
|
| 31 |
-
self.comboBox_2.addItem(str(name))
|
| 32 |
-
|
| 33 |
-
self.submit_button.clicked.connect(self.submit)
|
| 34 |
-
self.cancel_button.clicked.connect(self.cancel)
|
| 35 |
-
|
| 36 |
-
### Set up validators for input fields:
|
| 37 |
-
reg_ex1 = QtCore.QRegExp("[^/\\\\_]+") # No slashes or underscores
|
| 38 |
-
reg_ex2 = QtCore.QRegExp("[^/\\\\_\\s]+") # No slashes, underscores, or spaces
|
| 39 |
-
reg_ex3 = QtCore.QRegExp("[acdefghiklmnpqrstvwyACDEFGHIKLMNPQRSTVWY\S]+") # Only approved PAM characters and no spaces
|
| 40 |
-
input_validator1 = QtGui.QRegExpValidator(reg_ex1, self)
|
| 41 |
-
input_validator2 = QtGui.QRegExpValidator(reg_ex2, self)
|
| 42 |
-
input_validator3 = QtGui.QRegExpValidator(reg_ex3, self)
|
| 43 |
-
self.organism_name.setValidator(input_validator1)
|
| 44 |
-
self.abbreviation.setValidator(input_validator2)
|
| 45 |
-
self.pam_sequence.setValidator(input_validator3)
|
| 46 |
-
|
| 47 |
-
self.seed_length.setValidator(QIntValidator(0,30,self.seed_length))
|
| 48 |
-
self.five_length.setValidator(QIntValidator(0,20,self.five_length))
|
| 49 |
-
self.three_length.setValidator(QIntValidator(0,20,self.three_length))
|
| 50 |
-
|
| 51 |
-
groupbox_style = """
|
| 52 |
-
QGroupBox:title{subcontrol-origin: margin;
|
| 53 |
-
left: 10px;
|
| 54 |
-
padding: 0 5px 0 5px;}
|
| 55 |
-
QGroupBox#groupBox{border: 2px solid rgb(111,181,110);
|
| 56 |
-
border-radius: 9px;
|
| 57 |
-
font: bold 14pt 'Arial';
|
| 58 |
-
margin-top: 10px;}"""
|
| 59 |
-
|
| 60 |
-
self.groupBox.setStyleSheet(groupbox_style)
|
| 61 |
-
self.groupBox_2.setStyleSheet(groupbox_style.replace("groupBox","groupBox_2"))
|
| 62 |
-
self.groupBox_3.setStyleSheet(groupbox_style.replace("groupBox","groupBox_3"))
|
| 63 |
-
|
| 64 |
-
scale_ui(self, custom_scale_width=480, custom_scale_height=615)
|
| 65 |
-
except Exception as e:
|
| 66 |
-
show_error("Error initializing NewEndonuclease class.", e)
|
| 67 |
-
|
| 68 |
-
#helper function for writing new endo information to CASPERinfo - used by submit()
|
| 69 |
-
def writeNewEndonuclease(self, newEndonucleaseStr):
|
| 70 |
-
try:
|
| 71 |
-
with open(GlobalSettings.appdir + 'CASPERinfo', 'r') as f, open(GlobalSettings.appdir + "new_file", 'w+') as f1:
|
| 72 |
-
for line in f:
|
| 73 |
-
f1.write(line)
|
| 74 |
-
if 'ENDONUCLEASES' in line:
|
| 75 |
-
f1.write(newEndonucleaseStr + '\n') # Move f1.write(line) above, to write above instead
|
| 76 |
-
os.remove(GlobalSettings.appdir + "CASPERinfo")
|
| 77 |
-
os.rename(GlobalSettings.appdir + "new_file",
|
| 78 |
-
GlobalSettings.appdir + "CASPERinfo") # Rename the new file
|
| 79 |
-
except Exception as e:
|
| 80 |
-
show_error("Error in writeNewEndonuclease() in New Endonuclease.", e)
|
| 81 |
-
|
| 82 |
-
#submit new endo to CASPERinfo file
|
| 83 |
-
def submit(self):
|
| 84 |
-
try:
|
| 85 |
-
# This is executed when the button is pressed
|
| 86 |
-
name = str(self.organism_name.text())
|
| 87 |
-
abbr = str(self.abbreviation.text())
|
| 88 |
-
crisprtype = str(self.crispr_type.text())
|
| 89 |
-
seed_len = str(self.seed_length.text())
|
| 90 |
-
five_len = str(self.five_length.text())
|
| 91 |
-
three_len = str(self.three_length.text())
|
| 92 |
-
pam = str(self.pam_sequence.text()).upper()
|
| 93 |
-
### Check for multiple PAMs and format if present
|
| 94 |
-
if len(pam.split(','))>0:
|
| 95 |
-
pam = [x.strip() for x in pam.split(',')]
|
| 96 |
-
pam = ",".join(pam)
|
| 97 |
-
### Check for PAM directionality
|
| 98 |
-
if self.five_pam.isChecked():
|
| 99 |
-
pam_dir = str(5)
|
| 100 |
-
else:
|
| 101 |
-
pam_dir = str(3)
|
| 102 |
-
on_scoring = str(self.comboBox.currentText())
|
| 103 |
-
off_scoring = str(self.comboBox_2.currentText())
|
| 104 |
-
length = len(seed_len) + len(five_len) + len(three_len)
|
| 105 |
-
argument_list = [abbr, pam, five_len, seed_len, three_len, pam_dir, name, crisprtype, on_scoring, off_scoring]
|
| 106 |
-
validPAM = ('A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y')
|
| 107 |
-
self.error = False;
|
| 108 |
-
|
| 109 |
-
### Error checking for PAM alphabet
|
| 110 |
-
for letter in pam:
|
| 111 |
-
if (letter not in validPAM):
|
| 112 |
-
show_message(
|
| 113 |
-
fontSize=12,
|
| 114 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 115 |
-
title="Invalid PAM",
|
| 116 |
-
message="Invalid characters in PAM Sequence."
|
| 117 |
-
)
|
| 118 |
-
return True
|
| 119 |
-
### Error checking for filling out all fields
|
| 120 |
-
for arg in argument_list:
|
| 121 |
-
if ';' in arg:
|
| 122 |
-
show_message(
|
| 123 |
-
fontSize=12,
|
| 124 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 125 |
-
title="Invalid Semicolon",
|
| 126 |
-
message="Invalid character used: ';'."
|
| 127 |
-
)
|
| 128 |
-
return True
|
| 129 |
-
elif arg == "":
|
| 130 |
-
show_message(
|
| 131 |
-
fontSize=12,
|
| 132 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 133 |
-
title="Empty Field",
|
| 134 |
-
message="Please fill in all fields."
|
| 135 |
-
)
|
| 136 |
-
return True
|
| 137 |
-
else:
|
| 138 |
-
pass
|
| 139 |
-
|
| 140 |
-
### Check for duplicate endo abbreviations
|
| 141 |
-
for key in GlobalSettings.mainWindow.organisms_to_endos:
|
| 142 |
-
endo = GlobalSettings.mainWindow.organisms_to_endos[key]
|
| 143 |
-
if abbr in endo:
|
| 144 |
-
show_message(
|
| 145 |
-
fontSize=12,
|
| 146 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 147 |
-
title="Duplicate endo name.",
|
| 148 |
-
message="The given abbreviation already exists. Please choose a unique identifier."
|
| 149 |
-
)
|
| 150 |
-
return True
|
| 151 |
-
else:
|
| 152 |
-
pass
|
| 153 |
-
|
| 154 |
-
myString = ""
|
| 155 |
-
for i, arg in enumerate(argument_list):
|
| 156 |
-
if i == len(argument_list)-1: ### Last argument in list
|
| 157 |
-
myString += str(arg)
|
| 158 |
-
else:
|
| 159 |
-
myString += str(arg) + ";"
|
| 160 |
-
|
| 161 |
-
self.writeNewEndonuclease(myString)
|
| 162 |
-
|
| 163 |
-
### Refresh endonuclease dropdown in New Genome
|
| 164 |
-
GlobalSettings.mainWindow.newGenome.fillEndo()
|
| 165 |
-
|
| 166 |
-
self.clear_all()
|
| 167 |
-
self.close()
|
| 168 |
-
except Exception as e:
|
| 169 |
-
show_error("Error in submit() in New Endonuclease.", e)
|
| 170 |
-
|
| 171 |
-
#cancel and close window
|
| 172 |
-
def cancel(self):
|
| 173 |
-
try:
|
| 174 |
-
self.clear_all()
|
| 175 |
-
self.close()
|
| 176 |
-
except Exception as e:
|
| 177 |
-
show_error("Error in cancel() in New Endonuclease.", e)
|
| 178 |
-
|
| 179 |
-
# This function clears all of the line edits
|
| 180 |
-
def clear_all(self):
|
| 181 |
-
try:
|
| 182 |
-
self.organism_name.clear()
|
| 183 |
-
self.abbreviation.clear()
|
| 184 |
-
self.crispr_type.clear()
|
| 185 |
-
self.seed_length.clear()
|
| 186 |
-
self.five_length.clear()
|
| 187 |
-
self.three_length.clear()
|
| 188 |
-
self.pam_sequence.clear()
|
| 189 |
-
except Exception as e:
|
| 190 |
-
show_error("Error in clear_all() in New Endonuclease.", e)
|
| 191 |
-
|
| 192 |
-
# This function parses CASPERinfo to return the names (in lists) of all on-target and off-target scoring data
|
| 193 |
-
def get_on_off_data(self):
|
| 194 |
-
try:
|
| 195 |
-
filename = GlobalSettings.appdir + "CASPERinfo"
|
| 196 |
-
retList_on = []
|
| 197 |
-
retList_off = []
|
| 198 |
-
with open(filename, 'r') as f:
|
| 199 |
-
lines = f.readlines()
|
| 200 |
-
for i, line in enumerate(lines):
|
| 201 |
-
line = str(line)
|
| 202 |
-
if "ON-TARGET DATA" in line:
|
| 203 |
-
index = i
|
| 204 |
-
while "-----" not in line:
|
| 205 |
-
if "DATA:" in line:
|
| 206 |
-
retList_on.append(line.split("DATA:")[-1].strip()) ### Append name of scoring data to on-target name list
|
| 207 |
-
line = lines[index+1]
|
| 208 |
-
index += 1
|
| 209 |
-
else:
|
| 210 |
-
line = lines[index+1]
|
| 211 |
-
index += 1
|
| 212 |
-
continue
|
| 213 |
-
elif "OFF-TARGET MATRICES" in line:
|
| 214 |
-
index = i
|
| 215 |
-
while "-----" not in line:
|
| 216 |
-
if "MATRIX:" in line:
|
| 217 |
-
retList_off.append(line.split("MATRIX:")[-1].strip()) ### Append name of scoring data to off-target name list
|
| 218 |
-
line = lines[index+1]
|
| 219 |
-
index += 1
|
| 220 |
-
else:
|
| 221 |
-
line = lines[index+1]
|
| 222 |
-
index += 1
|
| 223 |
-
continue
|
| 224 |
-
else:
|
| 225 |
-
continue
|
| 226 |
-
return retList_on, retList_off
|
| 227 |
-
except Exception as e:
|
| 228 |
-
show_error("Error in get_on_off_data() in New Endonuclease.", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1,705 +0,0 @@
|
|
| 1 |
-
from ast import Global
|
| 2 |
-
import os
|
| 3 |
-
from PyQt5 import QtWidgets, uic, QtGui, QtCore, Qt
|
| 4 |
-
import models.GlobalSettings as GlobalSettings
|
| 5 |
-
from functools import partial
|
| 6 |
-
from utils.Algorithms import SeqTranslate
|
| 7 |
-
import webbrowser
|
| 8 |
-
import platform
|
| 9 |
-
import traceback
|
| 10 |
-
import math
|
| 11 |
-
from utils.ui import show_message, show_error, scale_ui, center_ui
|
| 12 |
-
from utils.web import ncbi_page, repo_page
|
| 13 |
-
|
| 14 |
-
logger = GlobalSettings.logger
|
| 15 |
-
|
| 16 |
-
def iter_except(function, exception):
|
| 17 |
-
"""Works like builtin 2-argument `iter()`, but stops on `exception`."""
|
| 18 |
-
try:
|
| 19 |
-
while True:
|
| 20 |
-
yield function()
|
| 21 |
-
except exception:
|
| 22 |
-
return
|
| 23 |
-
|
| 24 |
-
#UI prompt for when the user has finished running jobs in new genome to allow them to choose where the want to proceed
|
| 25 |
-
class goToPrompt(QtWidgets.QMainWindow):
|
| 26 |
-
def __init__(self):
|
| 27 |
-
try:
|
| 28 |
-
super(goToPrompt, self).__init__()
|
| 29 |
-
uic.loadUi(GlobalSettings.appdir + 'ui/newgenomenavigationpage.ui', self)
|
| 30 |
-
|
| 31 |
-
groupbox_style = """
|
| 32 |
-
QGroupBox:title{subcontrol-origin: margin;
|
| 33 |
-
left: 10px;
|
| 34 |
-
padding: 0 5px 0 5px;}
|
| 35 |
-
QGroupBox#groupBox{border: 2px solid rgb(111,181,110);
|
| 36 |
-
border-radius: 9px;
|
| 37 |
-
font: bold 14pt 'Arial';
|
| 38 |
-
margin-top: 10px;}"""
|
| 39 |
-
self.groupBox.setStyleSheet(groupbox_style)
|
| 40 |
-
scale_ui(self, custom_scale_width=575, custom_scale_height=175)
|
| 41 |
-
self.setWindowTitle("New Genome")
|
| 42 |
-
self.setWindowIcon(Qt.QIcon(GlobalSettings.appdir + "cas9image.ico"))
|
| 43 |
-
self.hide()
|
| 44 |
-
|
| 45 |
-
except Exception as e:
|
| 46 |
-
show_error("Unable to initialize goToPrompt class in New Genome.", e)
|
| 47 |
-
|
| 48 |
-
#New genome class to allow users to generate new CSPR files
|
| 49 |
-
class NewGenome(QtWidgets.QMainWindow):
|
| 50 |
-
def __init__(self, info_path):
|
| 51 |
-
try:
|
| 52 |
-
super(NewGenome, self).__init__()
|
| 53 |
-
uic.loadUi(GlobalSettings.appdir + 'ui/NewGenome.ui', self)
|
| 54 |
-
self.setWindowTitle('New Genome')
|
| 55 |
-
self.setWindowTitle('New Genome')
|
| 56 |
-
self.info_path = info_path
|
| 57 |
-
|
| 58 |
-
#---Style Modifications---#
|
| 59 |
-
|
| 60 |
-
groupbox_style = """
|
| 61 |
-
QGroupBox:title{subcontrol-origin: margin;
|
| 62 |
-
left: 10px;
|
| 63 |
-
padding: 0 5px 0 5px;}
|
| 64 |
-
QGroupBox#Step1{border: 2px solid rgb(111,181,110);
|
| 65 |
-
border-radius: 9px;
|
| 66 |
-
font: bold 14pt 'Arial';
|
| 67 |
-
margin-top: 10px;}"""
|
| 68 |
-
|
| 69 |
-
self.Step1.setStyleSheet(groupbox_style)
|
| 70 |
-
self.Step2.setStyleSheet(groupbox_style.replace("Step1","Step2"))
|
| 71 |
-
self.Step3.setStyleSheet(groupbox_style.replace("Step1","Step3"))
|
| 72 |
-
|
| 73 |
-
#---Button Modifications---#
|
| 74 |
-
|
| 75 |
-
self.setWindowIcon(Qt.QIcon(GlobalSettings.appdir + "cas9image.ico"))
|
| 76 |
-
self.resetButton.clicked.connect(self.reset)
|
| 77 |
-
self.submitButton.clicked.connect(self.submit)
|
| 78 |
-
self.browseForFile.clicked.connect(self.selectFasta)
|
| 79 |
-
self.remove_job.clicked.connect(self.remove_from_queue)
|
| 80 |
-
self.output_browser.setText("Waiting for program initiation...")
|
| 81 |
-
self.contButton.clicked.connect(self.continue_to_main)
|
| 82 |
-
|
| 83 |
-
self.comboBoxEndo.currentIndexChanged.connect(self.endo_settings)
|
| 84 |
-
|
| 85 |
-
self.runButton.clicked.connect(self.run_jobs_wrapper)
|
| 86 |
-
self.clearButton.clicked.connect(self.clear_all)
|
| 87 |
-
|
| 88 |
-
self.JobsQueue = [] # holds Job classes.
|
| 89 |
-
self.check_strings = []
|
| 90 |
-
self.Endos = dict()
|
| 91 |
-
self.file = ""
|
| 92 |
-
|
| 93 |
-
self.process = QtCore.QProcess()
|
| 94 |
-
self.process.setProcessChannelMode(QtCore.QProcess.MergedChannels)
|
| 95 |
-
self.process.finished.connect(self.upon_process_finishing)
|
| 96 |
-
self.seqTrans = SeqTranslate()
|
| 97 |
-
self.exit = False
|
| 98 |
-
|
| 99 |
-
self.first = False
|
| 100 |
-
#show functionalities on window
|
| 101 |
-
self.fillEndo()
|
| 102 |
-
|
| 103 |
-
self.num_chromo_next = False
|
| 104 |
-
|
| 105 |
-
#Jobs Table
|
| 106 |
-
self.job_Table.setShowGrid(False)
|
| 107 |
-
self.job_Table.horizontalHeader().setSectionsClickable(True)
|
| 108 |
-
self.job_Table.setSelectionBehavior(QtWidgets.QAbstractItemView.SelectRows)
|
| 109 |
-
self.job_Table.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
|
| 110 |
-
self.job_Table.setSelectionMode(QtWidgets.QAbstractItemView.MultiSelection)
|
| 111 |
-
self.job_Table.setSizeAdjustPolicy(QtWidgets.QAbstractScrollArea.AdjustToContents)
|
| 112 |
-
self.fin_index=0
|
| 113 |
-
|
| 114 |
-
self.mwfg = self.frameGeometry() ##Center window
|
| 115 |
-
self.cp = QtWidgets.QDesktopWidget().availableGeometry().center() ##Center window
|
| 116 |
-
self.total_chrom_count = 0
|
| 117 |
-
self.perc_increase = 0
|
| 118 |
-
self.progress = 0
|
| 119 |
-
|
| 120 |
-
#toolbar button actions
|
| 121 |
-
self.visit_repo.triggered.connect(repo_page)
|
| 122 |
-
self.go_ncbi.triggered.connect(ncbi_page)
|
| 123 |
-
|
| 124 |
-
self.comboBoxEndo.currentIndexChanged.connect(self.changeEndos)
|
| 125 |
-
|
| 126 |
-
### NCBI tool
|
| 127 |
-
self.NCBI_File_Search.clicked.connect(self.open_ncbi_tool)
|
| 128 |
-
|
| 129 |
-
self.seed_length.setEnabled(False)
|
| 130 |
-
self.five_length.setEnabled(False)
|
| 131 |
-
self.three_length.setEnabled(False)
|
| 132 |
-
self.repeats_box.setEnabled(False)
|
| 133 |
-
|
| 134 |
-
### User prompt class
|
| 135 |
-
self.goToPrompt = goToPrompt()
|
| 136 |
-
self.goToPrompt.goToMain.clicked.connect(self.continue_to_main)
|
| 137 |
-
self.goToPrompt.goToMT.clicked.connect(self.continue_to_MT)
|
| 138 |
-
self.goToPrompt.goToPop.clicked.connect(self.continue_to_pop)
|
| 139 |
-
|
| 140 |
-
self.orgName.setFocus()
|
| 141 |
-
|
| 142 |
-
### Connect New endonuclease to New Genome
|
| 143 |
-
self.actionUpload_New_Endonuclease.triggered.connect(self.launch_newEndonuclease)
|
| 144 |
-
|
| 145 |
-
### Set up validators for input fields:
|
| 146 |
-
reg_ex1 = QtCore.QRegExp("[^/\\\\_]+") # No slashes or underscores
|
| 147 |
-
reg_ex2 = QtCore.QRegExp("\\S+")
|
| 148 |
-
input_validator1 = QtGui.QRegExpValidator(reg_ex1, self)
|
| 149 |
-
input_validator2 = QtGui.QRegExpValidator(reg_ex2, self)
|
| 150 |
-
self.orgName.setValidator(input_validator1)
|
| 151 |
-
self.strainName.setValidator(input_validator1)
|
| 152 |
-
self.orgCode.setValidator(input_validator2)
|
| 153 |
-
|
| 154 |
-
scale_ui(self, custom_scale_width=850, custom_scale_height=750)
|
| 155 |
-
self.first_show = True
|
| 156 |
-
except Exception as e:
|
| 157 |
-
show_error("Error initializing New Genome class.", e)
|
| 158 |
-
|
| 159 |
-
def launch_newEndonuclease(self):
|
| 160 |
-
try:
|
| 161 |
-
GlobalSettings.mainWindow.getData()
|
| 162 |
-
GlobalSettings.mainWindow.newEndonuclease.centerUI()
|
| 163 |
-
GlobalSettings.mainWindow.newEndonuclease.show()
|
| 164 |
-
GlobalSettings.mainWindow.newEndonuclease.activateWindow()
|
| 165 |
-
except Exception as e:
|
| 166 |
-
show_error("Error in launch_newEndonuclease() in New Genome.", e)
|
| 167 |
-
|
| 168 |
-
#open the ncbi search tool window
|
| 169 |
-
def open_ncbi_tool(self):
|
| 170 |
-
try:
|
| 171 |
-
#center ncbi on current screen
|
| 172 |
-
if GlobalSettings.mainWindow.ncbi.first_show == True:
|
| 173 |
-
GlobalSettings.mainWindow.ncbi.first_show = False
|
| 174 |
-
GlobalSettings.mainWindow.ncbi.centerUI()
|
| 175 |
-
if self.orgName.text() != "":
|
| 176 |
-
GlobalSettings.mainWindow.ncbi.organism_line_edit.setText(self.orgName.text())
|
| 177 |
-
if self.strainName.text() != "":
|
| 178 |
-
GlobalSettings.mainWindow.ncbi.infra_name_line_edit.setText(self.strainName.text())
|
| 179 |
-
GlobalSettings.mainWindow.ncbi.show()
|
| 180 |
-
GlobalSettings.mainWindow.ncbi.activateWindow()
|
| 181 |
-
except Exception as e:
|
| 182 |
-
show_error("Error in open_ncbi_tool() in New Genome.", e)
|
| 183 |
-
|
| 184 |
-
def remove_from_queue(self):
|
| 185 |
-
try:
|
| 186 |
-
while(True):
|
| 187 |
-
indexes = self.job_Table.selectionModel().selectedRows()
|
| 188 |
-
if len(indexes) == 0:
|
| 189 |
-
break
|
| 190 |
-
self.job_Table.removeRow(indexes[0].row())
|
| 191 |
-
except Exception as e:
|
| 192 |
-
show_error("Error in remove_from_queue() in New Genome.", e)
|
| 193 |
-
|
| 194 |
-
#prompt user with file browser to select fasta/fna files
|
| 195 |
-
def selectFasta(self):
|
| 196 |
-
try:
|
| 197 |
-
filed = QtWidgets.QFileDialog()
|
| 198 |
-
myFile = QtWidgets.QFileDialog.getOpenFileName(filed, "Choose a File")
|
| 199 |
-
if (myFile[0] != ""):
|
| 200 |
-
if not myFile[0].endswith(".fa") and not myFile[0].endswith(".fna") and not myFile[0].endswith(".fasta"):
|
| 201 |
-
show_message(
|
| 202 |
-
fontSize=12,
|
| 203 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 204 |
-
title="File Selection Error",
|
| 205 |
-
message="You have selected an incorrect type of file. Please choose a FASTA/FNA file."
|
| 206 |
-
)
|
| 207 |
-
return
|
| 208 |
-
else:
|
| 209 |
-
self.file = myFile[0]
|
| 210 |
-
self.selectedFile.setText(str(myFile[0]))
|
| 211 |
-
except Exception as e:
|
| 212 |
-
show_error("Error in selectFasta() in New Genome.", e)
|
| 213 |
-
|
| 214 |
-
#submit jobs to queue
|
| 215 |
-
def submit(self):
|
| 216 |
-
try:
|
| 217 |
-
warning = ""
|
| 218 |
-
if len(self.orgName.text()) == 0:
|
| 219 |
-
warning = warning + "You need to include the organism's name."
|
| 220 |
-
if len(self.file) == 0:
|
| 221 |
-
warning = warning + "You need to select a file."
|
| 222 |
-
if len(warning) != 0:
|
| 223 |
-
show_message(
|
| 224 |
-
fontSize=12,
|
| 225 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 226 |
-
title="Required Information",
|
| 227 |
-
message=warning
|
| 228 |
-
)
|
| 229 |
-
return
|
| 230 |
-
if len(self.strainName.text()) == 0:
|
| 231 |
-
warning = warning + "\nIt is recommended to include the organism's subspecies/strain."
|
| 232 |
-
if len(self.orgCode.text()) == 0:
|
| 233 |
-
warning = warning + "\nYou must include an organism code."
|
| 234 |
-
if len(warning) != 0:
|
| 235 |
-
msgBox = QtWidgets.QMessageBox()
|
| 236 |
-
msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
|
| 237 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Question)
|
| 238 |
-
msgBox.setWindowTitle("Missing Information")
|
| 239 |
-
msgBox.setText(warning + "\n\nDo you wish to continue without including this information?")
|
| 240 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Yes)
|
| 241 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.No)
|
| 242 |
-
msgBox.exec()
|
| 243 |
-
|
| 244 |
-
if msgBox.result() == QtWidgets.QMessageBox.No:
|
| 245 |
-
return
|
| 246 |
-
|
| 247 |
-
#endo, pam, repeats, directionality, five length, seed length, three length, orgcode, output path, CASPERinfo path, fna path, orgName, notes, on target matrix
|
| 248 |
-
args = self.Endos[self.comboBoxEndo.currentText()][0]
|
| 249 |
-
args += " " + self.Endos[self.comboBoxEndo.currentText()][1]
|
| 250 |
-
if self.mt.isChecked():
|
| 251 |
-
args += " " + "TRUE"
|
| 252 |
-
else:
|
| 253 |
-
args += " " + "FALSE"
|
| 254 |
-
|
| 255 |
-
if self.Endos[self.comboBoxEndo.currentText()][5] == "3":
|
| 256 |
-
args += " " + "FALSE"
|
| 257 |
-
else:
|
| 258 |
-
args += " " + "TRUE"
|
| 259 |
-
|
| 260 |
-
if self.repeats_box.isChecked():
|
| 261 |
-
args += " " + "TRUE"
|
| 262 |
-
else:
|
| 263 |
-
args += " " + "FALSE"
|
| 264 |
-
|
| 265 |
-
args += " " + self.Endos[self.comboBoxEndo.currentText()][2]
|
| 266 |
-
args += " " + self.Endos[self.comboBoxEndo.currentText()][3]
|
| 267 |
-
args += " " + self.Endos[self.comboBoxEndo.currentText()][4]
|
| 268 |
-
args += " " + self.orgCode.text()
|
| 269 |
-
if platform.system() == 'Windows':
|
| 270 |
-
args += " " + '"' + GlobalSettings.CSPR_DB.replace("/","\\") + '\\"'
|
| 271 |
-
args += " " + '"' + GlobalSettings.appdir.replace("/","\\") + "CASPERinfo" + '"'
|
| 272 |
-
args += " " + '"' + self.file.replace("/","\\") + '"'
|
| 273 |
-
else:
|
| 274 |
-
args += " " + '"' + GlobalSettings.CSPR_DB.replace("\\","/") + '/"'
|
| 275 |
-
args += " " + '"' + GlobalSettings.appdir.replace("\\","/") + "CASPERinfo" + '"'
|
| 276 |
-
args += " " + '"' + self.file.replace("\\","/") + '"'
|
| 277 |
-
|
| 278 |
-
args += " " + '"' + self.orgName.text() + " " + self.strainName.text() + '"'
|
| 279 |
-
args += " " + '"' + "notes" + '"'
|
| 280 |
-
args += " " + '"DATA:' + self.Endos[self.comboBoxEndo.currentText()][6] + '"'
|
| 281 |
-
|
| 282 |
-
tmp = self.orgName.text()+ " " + self.strainName.text() + " " + self.Endos[self.comboBoxEndo.currentText()][0] + " " + self.orgCode.text()
|
| 283 |
-
if tmp in self.check_strings:
|
| 284 |
-
show_message(
|
| 285 |
-
fontSize=12,
|
| 286 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 287 |
-
title="Duplicate Entry",
|
| 288 |
-
message="You have submitted a duplicate entry. Consider changing the organism code or strain name to differentiate closely related strains."
|
| 289 |
-
)
|
| 290 |
-
return
|
| 291 |
-
name = self.orgCode.text() + "_" + str(self.Endos[self.comboBoxEndo.currentText()][0])
|
| 292 |
-
rowPosition = self.job_Table.rowCount()
|
| 293 |
-
self.job_Table.insertRow(rowPosition)
|
| 294 |
-
item = QtWidgets.QTableWidgetItem(name)
|
| 295 |
-
item.setTextAlignment(QtCore.Qt.AlignHCenter)
|
| 296 |
-
self.job_Table.setItem(rowPosition, 0, item)
|
| 297 |
-
self.check_strings.append(tmp)
|
| 298 |
-
self.JobsQueue.append(args)
|
| 299 |
-
except Exception as e:
|
| 300 |
-
show_error("Error in submit() in New Genome.", e)
|
| 301 |
-
|
| 302 |
-
#fill the endo dropdown
|
| 303 |
-
def fillEndo(self):
|
| 304 |
-
try:
|
| 305 |
-
#disconnect signal
|
| 306 |
-
try:
|
| 307 |
-
self.comboBoxEndo.currentIndexChanged.disconnect()
|
| 308 |
-
except:
|
| 309 |
-
pass
|
| 310 |
-
|
| 311 |
-
#clear out the endo box
|
| 312 |
-
self.comboBoxEndo.clear()
|
| 313 |
-
|
| 314 |
-
f = open(GlobalSettings.appdir + "CASPERinfo")
|
| 315 |
-
while True:
|
| 316 |
-
line = f.readline()
|
| 317 |
-
if line.startswith('ENDONUCLEASES'):
|
| 318 |
-
while True:
|
| 319 |
-
line = f.readline()
|
| 320 |
-
if (line[0] == "-"):
|
| 321 |
-
break
|
| 322 |
-
line_tokened = line.split(";")
|
| 323 |
-
if len(line_tokened) == 10:
|
| 324 |
-
endo = line_tokened[0]
|
| 325 |
-
# Checking to see if there is more than one pam sequence in the list
|
| 326 |
-
if line_tokened[1].find(",") != -1:
|
| 327 |
-
p_pam = line_tokened[1].split(",")[0]
|
| 328 |
-
else:
|
| 329 |
-
p_pam = line_tokened[1]
|
| 330 |
-
five_length = line_tokened[2]
|
| 331 |
-
seed_length = line_tokened[3]
|
| 332 |
-
three_length = line_tokened[4]
|
| 333 |
-
dir = line_tokened[5]
|
| 334 |
-
on_target_data = line_tokened[8]
|
| 335 |
-
self.Endos[endo + " - PAM: " + p_pam] = (endo, p_pam, five_length, seed_length, three_length, dir, on_target_data)
|
| 336 |
-
break
|
| 337 |
-
f.close()
|
| 338 |
-
self.comboBoxEndo.addItems(self.Endos.keys())
|
| 339 |
-
key = list(self.Endos.keys())[0]
|
| 340 |
-
self.seed_length.setText(self.Endos[key][3])
|
| 341 |
-
self.five_length.setText(self.Endos[key][2])
|
| 342 |
-
self.three_length.setText(self.Endos[key][4])
|
| 343 |
-
|
| 344 |
-
#reconnect signal
|
| 345 |
-
self.comboBoxEndo.currentIndexChanged.connect(self.changeEndos)
|
| 346 |
-
except Exception as e:
|
| 347 |
-
show_error("Error in fillEndo() in New Genome.", e)
|
| 348 |
-
|
| 349 |
-
#event handler for endo changing - update endo length data
|
| 350 |
-
def changeEndos(self):
|
| 351 |
-
try:
|
| 352 |
-
key = str(self.comboBoxEndo.currentText())
|
| 353 |
-
self.seed_length.setText(self.Endos[key][3])
|
| 354 |
-
self.five_length.setText(self.Endos[key][2])
|
| 355 |
-
self.three_length.setText(self.Endos[key][4])
|
| 356 |
-
except Exception as e:
|
| 357 |
-
show_error("Error in changeEndos() in New Genome.", e)
|
| 358 |
-
|
| 359 |
-
#check if endo is 3' or 5'
|
| 360 |
-
def endo_settings(self):
|
| 361 |
-
try:
|
| 362 |
-
# check the if it's 3' or 5', and check the box accordingly
|
| 363 |
-
if int(self.seqTrans.endo_info[self.Endos[self.comboBoxEndo.currentText()][0]][3]) == 3:
|
| 364 |
-
self.pamBox.setChecked(0)
|
| 365 |
-
elif int(self.seqTrans.endo_info[self.Endos[self.comboBoxEndo.currentText()][0]][3]) == 5:
|
| 366 |
-
self.pamBox.setChecked(1)
|
| 367 |
-
except Exception as e:
|
| 368 |
-
show_error("Error in endo_settings() in New Genome.", e)
|
| 369 |
-
|
| 370 |
-
#wrapper for running jobs
|
| 371 |
-
def run_jobs_wrapper(self):
|
| 372 |
-
try:
|
| 373 |
-
self.indexes = []
|
| 374 |
-
self.job_Table.selectAll()
|
| 375 |
-
indexes = self.job_Table.selectionModel().selectedRows()
|
| 376 |
-
for index in sorted(indexes):
|
| 377 |
-
if self.job_Table.item(index.row(), 0).text() != "":
|
| 378 |
-
self.indexes.append(index.row())
|
| 379 |
-
self.run_job()
|
| 380 |
-
except Exception as e:
|
| 381 |
-
show_error("Error in run_jobs_wrapper() in New Genome.", e)
|
| 382 |
-
|
| 383 |
-
#run job in queue
|
| 384 |
-
def run_job(self):
|
| 385 |
-
try:
|
| 386 |
-
if len(self.indexes) > 0:
|
| 387 |
-
self.progressBar.setValue(0)
|
| 388 |
-
self.progress = 0
|
| 389 |
-
row_index = self.indexes[0]
|
| 390 |
-
name = self.job_Table.item(row_index, 0).text()
|
| 391 |
-
item = QtWidgets.QTableWidgetItem(name)
|
| 392 |
-
item.setTextAlignment(QtCore.Qt.AlignHCenter)
|
| 393 |
-
self.job_Table.setItem(row_index, 1, item)
|
| 394 |
-
self.job_Table.setItem(row_index, 0, QtWidgets.QTableWidgetItem(""))
|
| 395 |
-
|
| 396 |
-
def output_stdout(p):
|
| 397 |
-
line = str(p.readAll())
|
| 398 |
-
line = line[2:]
|
| 399 |
-
line = line[:len(line) - 1]
|
| 400 |
-
for lines in line.split(r"\n"):
|
| 401 |
-
lines = lines.rstrip("\n")
|
| 402 |
-
lines = lines.rstrip("\r")
|
| 403 |
-
lines = lines.rstrip(r"\n")
|
| 404 |
-
lines = lines.rstrip(r"\r")
|
| 405 |
-
lines = lines.rstrip("\r\n")
|
| 406 |
-
lines = lines.rstrip(r"\r\n")
|
| 407 |
-
if lines != "":
|
| 408 |
-
if lines.find("Number of Chromosomes/Scaffolds") != -1:
|
| 409 |
-
copy = lines
|
| 410 |
-
copy = copy.replace(" ","")
|
| 411 |
-
copy = copy[copy.find(":")+1:]
|
| 412 |
-
self.total_chrom_count = int(copy)
|
| 413 |
-
self.perc_increase = ((1 / (2 * self.total_chrom_count)) * 70)
|
| 414 |
-
self.progressBar.setValue(20)
|
| 415 |
-
self.progress = 20
|
| 416 |
-
elif lines.find("complete.") != -1:
|
| 417 |
-
self.progress += self.perc_increase
|
| 418 |
-
self.progressBar.setValue(int(self.progress))
|
| 419 |
-
elif lines.find("Processing Targets.") != -1:
|
| 420 |
-
self.progress = 70
|
| 421 |
-
self.progressBar.setValue(int(self.progress))
|
| 422 |
-
elif lines.find("Writing out uniques.") != -1:
|
| 423 |
-
self.progress = 90
|
| 424 |
-
self.progressBar.setValue(int(self.progress))
|
| 425 |
-
elif lines.find("Writing out repeats.") != -1:
|
| 426 |
-
self.progress = 95
|
| 427 |
-
self.progressBar.setValue(int(self.progress))
|
| 428 |
-
elif lines == "Finished.":
|
| 429 |
-
self.progress = 100
|
| 430 |
-
self.progressBar.setValue(int(self.progress))
|
| 431 |
-
self.output_browser.append(lines)
|
| 432 |
-
|
| 433 |
-
job_args = self.JobsQueue[row_index]
|
| 434 |
-
if platform.system() == 'Windows':
|
| 435 |
-
program = '"' + GlobalSettings.appdir + "SeqFinderFolder/Casper_Seq_Finder_Win.exe" + '" '
|
| 436 |
-
elif platform.system() == 'Linux':
|
| 437 |
-
program = '"' + GlobalSettings.appdir + "SeqFinderFolder/Casper_Seq_Finder_Lin" + '" '
|
| 438 |
-
else:
|
| 439 |
-
program = '"' + GlobalSettings.appdir + "SeqFinderFolder/Casper_Seq_Finder_Mac" + '" '
|
| 440 |
-
program += job_args
|
| 441 |
-
self.process.readyReadStandardOutput.connect(partial(output_stdout, self.process))
|
| 442 |
-
self.process.start(program)
|
| 443 |
-
else:
|
| 444 |
-
show_message(
|
| 445 |
-
fontSize=12,
|
| 446 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 447 |
-
title="No Jobs To Run",
|
| 448 |
-
message="No jobs are in the queue to run. Please add a job before running."
|
| 449 |
-
)
|
| 450 |
-
except Exception as e:
|
| 451 |
-
show_error("Error in run_job() in New Genome.", e)
|
| 452 |
-
|
| 453 |
-
#even handler for when jobs finish execution
|
| 454 |
-
def upon_process_finishing(self):
|
| 455 |
-
try:
|
| 456 |
-
row_index = self.indexes[0]
|
| 457 |
-
name = self.job_Table.item(row_index, 1).text()
|
| 458 |
-
item = QtWidgets.QTableWidgetItem(name)
|
| 459 |
-
item.setTextAlignment(QtCore.Qt.AlignHCenter)
|
| 460 |
-
self.job_Table.setItem(row_index, 2, item)
|
| 461 |
-
self.job_Table.setItem(row_index, 1, QtWidgets.QTableWidgetItem(""))
|
| 462 |
-
self.indexes.pop(0)
|
| 463 |
-
if len(self.indexes) != 0:
|
| 464 |
-
self.run_job()
|
| 465 |
-
else:
|
| 466 |
-
#prompt user if they want to analyze their new files
|
| 467 |
-
center_ui(self.goToPrompt)
|
| 468 |
-
self.goToPrompt.show()
|
| 469 |
-
self.goToPrompt.activateWindow()
|
| 470 |
-
except Exception as e:
|
| 471 |
-
show_error("Error in upon_process_finishing() in New Genome.", e)
|
| 472 |
-
|
| 473 |
-
#clear the job table
|
| 474 |
-
def clear_all(self):
|
| 475 |
-
try:
|
| 476 |
-
self.process.kill()
|
| 477 |
-
self.fin_index = 0
|
| 478 |
-
self.job_Table.clearContents()
|
| 479 |
-
self.job_Table.setRowCount(0)
|
| 480 |
-
self.JobsQueue = []
|
| 481 |
-
self.check_strings = []
|
| 482 |
-
self.output_browser.clear()
|
| 483 |
-
self.output_browser.setText("Waiting for program initiation...")
|
| 484 |
-
self.orgName.clear()
|
| 485 |
-
self.strainName.clear()
|
| 486 |
-
self.orgCode.clear()
|
| 487 |
-
self.selectedFile.clear()
|
| 488 |
-
self.selectedFile.setPlaceholderText("Selected FASTA/FNA File")
|
| 489 |
-
self.progressBar.setValue(0)
|
| 490 |
-
self.first = False
|
| 491 |
-
except Exception as e:
|
| 492 |
-
show_error("Error in clear_all() in New Genome.", e)
|
| 493 |
-
|
| 494 |
-
#reset the whole form
|
| 495 |
-
def reset(self):
|
| 496 |
-
try:
|
| 497 |
-
self.orgName.clear()
|
| 498 |
-
self.strainName.clear()
|
| 499 |
-
self.orgCode.clear()
|
| 500 |
-
self.selectedFile.clear()
|
| 501 |
-
self.selectedFile.setPlaceholderText("Selected FASTA/FNA File")
|
| 502 |
-
self.output_browser.clear()
|
| 503 |
-
self.output_browser.setText("Waiting for program initiation...")
|
| 504 |
-
self.file = ""
|
| 505 |
-
except Exception as e:
|
| 506 |
-
show_error("Error in reset() in New Genome.", e)
|
| 507 |
-
|
| 508 |
-
#event handler for user wanting to close the window
|
| 509 |
-
def closeEvent(self, event):
|
| 510 |
-
try:
|
| 511 |
-
# make sure that there are cspr files in the DB
|
| 512 |
-
file_names = os.listdir(GlobalSettings.CSPR_DB)
|
| 513 |
-
noCSPRFiles = True
|
| 514 |
-
for file in file_names:
|
| 515 |
-
if 'cspr' in file:
|
| 516 |
-
noCSPRFiles = False
|
| 517 |
-
break
|
| 518 |
-
if noCSPRFiles == True:
|
| 519 |
-
if self.exit == False:
|
| 520 |
-
msgBox = QtWidgets.QMessageBox()
|
| 521 |
-
msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
|
| 522 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Question)
|
| 523 |
-
msgBox.setWindowTitle("No CSPR file generated")
|
| 524 |
-
msgBox.setText("No CSPR file has been generated, thus the main program cannot run. Please create a CSPR file."
|
| 525 |
-
"Alternatively, you could quit the program. Would you like to quit?")
|
| 526 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Yes)
|
| 527 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.No)
|
| 528 |
-
msgBox.exec()
|
| 529 |
-
|
| 530 |
-
if (msgBox.result() == QtWidgets.QMessageBox.No):
|
| 531 |
-
event.ignore()
|
| 532 |
-
else:
|
| 533 |
-
event.accept()
|
| 534 |
-
else:
|
| 535 |
-
self.exit = False
|
| 536 |
-
event.accept()
|
| 537 |
-
else:
|
| 538 |
-
self.process.kill()
|
| 539 |
-
self.clear_all()
|
| 540 |
-
self.goToPrompt.hide()
|
| 541 |
-
GlobalSettings.mainWindow.fill_annotation_dropdown()
|
| 542 |
-
if GlobalSettings.mainWindow.orgChoice.currentText() != '':
|
| 543 |
-
GlobalSettings.mainWindow.orgChoice.currentIndexChanged.disconnect()
|
| 544 |
-
GlobalSettings.mainWindow.orgChoice.clear()
|
| 545 |
-
GlobalSettings.mainWindow.endoChoice.clear()
|
| 546 |
-
GlobalSettings.mainWindow.getData()
|
| 547 |
-
GlobalSettings.MTWin.launch()
|
| 548 |
-
GlobalSettings.pop_Analysis.launch()
|
| 549 |
-
|
| 550 |
-
if GlobalSettings.mainWindow.first_show == True:
|
| 551 |
-
GlobalSettings.mainWindow.first_show = False
|
| 552 |
-
GlobalSettings.mainWindow.centerUI()
|
| 553 |
-
GlobalSettings.mainWindow.show()
|
| 554 |
-
event.accept()
|
| 555 |
-
except Exception as e:
|
| 556 |
-
show_error("Error in closeEvent() in New Genome.", e)
|
| 557 |
-
|
| 558 |
-
#event handler for user wanting to go to Main once jobs complete
|
| 559 |
-
def continue_to_main(self):
|
| 560 |
-
try:
|
| 561 |
-
# make sure that there are cspr files in the DB
|
| 562 |
-
file_names = os.listdir(GlobalSettings.CSPR_DB)
|
| 563 |
-
noCSPRFiles = True
|
| 564 |
-
for file in file_names:
|
| 565 |
-
if 'cspr' in file:
|
| 566 |
-
noCSPRFiles = False
|
| 567 |
-
break
|
| 568 |
-
if noCSPRFiles == True:
|
| 569 |
-
|
| 570 |
-
msgBox = QtWidgets.QMessageBox()
|
| 571 |
-
msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
|
| 572 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Question)
|
| 573 |
-
msgBox.setWindowTitle("No CSPR file generated")
|
| 574 |
-
msgBox.setText(
|
| 575 |
-
"No CSPR file has been generated, thus the main program cannot run. Please create a CSPR file."
|
| 576 |
-
"Alternatively, you could quit the program. Would you like to quit?")
|
| 577 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Yes)
|
| 578 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.No)
|
| 579 |
-
msgBox.exec()
|
| 580 |
-
|
| 581 |
-
if (msgBox.result() == QtWidgets.QMessageBox.Yes):
|
| 582 |
-
self.exit = True
|
| 583 |
-
self.close()
|
| 584 |
-
else:
|
| 585 |
-
self.process.kill()
|
| 586 |
-
self.clear_all()
|
| 587 |
-
self.goToPrompt.hide()
|
| 588 |
-
GlobalSettings.mainWindow.fill_annotation_dropdown()
|
| 589 |
-
if GlobalSettings.mainWindow.orgChoice.currentText() != '':
|
| 590 |
-
GlobalSettings.mainWindow.orgChoice.currentIndexChanged.disconnect()
|
| 591 |
-
GlobalSettings.mainWindow.orgChoice.clear()
|
| 592 |
-
GlobalSettings.mainWindow.endoChoice.clear()
|
| 593 |
-
GlobalSettings.mainWindow.getData()
|
| 594 |
-
GlobalSettings.MTWin.launch()
|
| 595 |
-
GlobalSettings.pop_Analysis.launch()
|
| 596 |
-
|
| 597 |
-
# center main on current screen
|
| 598 |
-
if GlobalSettings.mainWindow.first_show == True:
|
| 599 |
-
GlobalSettings.mainWindow.first_show = False
|
| 600 |
-
center_ui(GlobalSettings.mainWindow)
|
| 601 |
-
GlobalSettings.mainWindow.show()
|
| 602 |
-
self.hide()
|
| 603 |
-
except Exception as e:
|
| 604 |
-
show_error("Error in continue_to_main() in New Genome.", e)
|
| 605 |
-
|
| 606 |
-
#event handler for user wanting to go to multi-targeting once jobs complete
|
| 607 |
-
def continue_to_MT(self):
|
| 608 |
-
try:
|
| 609 |
-
# make sure that there are cspr files in the DB
|
| 610 |
-
file_names = os.listdir(GlobalSettings.CSPR_DB)
|
| 611 |
-
noCSPRFiles = True
|
| 612 |
-
for file in file_names:
|
| 613 |
-
if 'cspr' in file:
|
| 614 |
-
noCSPRFiles = False
|
| 615 |
-
break
|
| 616 |
-
if noCSPRFiles == True:
|
| 617 |
-
|
| 618 |
-
msgBox = QtWidgets.QMessageBox()
|
| 619 |
-
msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
|
| 620 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Question)
|
| 621 |
-
msgBox.setWindowTitle("No CSPR file generated")
|
| 622 |
-
msgBox.setText(
|
| 623 |
-
"No CSPR file has been generated, thus the main program cannot run. Please create a CSPR file."
|
| 624 |
-
"Alternatively, you could quit the program. Would you like to quit?")
|
| 625 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Yes)
|
| 626 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.No)
|
| 627 |
-
msgBox.exec()
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
if (msgBox.result() == QtWidgets.QMessageBox.Yes):
|
| 632 |
-
self.exit = True
|
| 633 |
-
self.close()
|
| 634 |
-
|
| 635 |
-
else:
|
| 636 |
-
self.process.kill()
|
| 637 |
-
self.clear_all()
|
| 638 |
-
self.goToPrompt.hide()
|
| 639 |
-
GlobalSettings.mainWindow.fill_annotation_dropdown()
|
| 640 |
-
if GlobalSettings.mainWindow.orgChoice.currentText() != '':
|
| 641 |
-
GlobalSettings.mainWindow.orgChoice.currentIndexChanged.disconnect()
|
| 642 |
-
GlobalSettings.mainWindow.orgChoice.clear()
|
| 643 |
-
GlobalSettings.mainWindow.endoChoice.clear()
|
| 644 |
-
GlobalSettings.mainWindow.getData()
|
| 645 |
-
GlobalSettings.MTWin.launch()
|
| 646 |
-
GlobalSettings.pop_Analysis.launch()
|
| 647 |
-
|
| 648 |
-
# center multi-targeting on current screen
|
| 649 |
-
if GlobalSettings.MTWin.first_show == True:
|
| 650 |
-
GlobalSettings.MTWin.first_show = False
|
| 651 |
-
GlobalSettings.MTWin.centerUI()
|
| 652 |
-
|
| 653 |
-
GlobalSettings.MTWin.show()
|
| 654 |
-
self.hide()
|
| 655 |
-
except Exception as e:
|
| 656 |
-
show_error("Error in continue_to_MT() in New Genome.", e)
|
| 657 |
-
|
| 658 |
-
#event handler for user wanting to go to population analysis once jobs complete
|
| 659 |
-
def continue_to_pop(self):
|
| 660 |
-
try:
|
| 661 |
-
# make sure that there are cspr files in the DB
|
| 662 |
-
file_names = os.listdir(GlobalSettings.CSPR_DB)
|
| 663 |
-
noCSPRFiles = True
|
| 664 |
-
for file in file_names:
|
| 665 |
-
if 'cspr' in file:
|
| 666 |
-
noCSPRFiles = False
|
| 667 |
-
break
|
| 668 |
-
if noCSPRFiles == True:
|
| 669 |
-
|
| 670 |
-
msgBox = QtWidgets.QMessageBox()
|
| 671 |
-
msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
|
| 672 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Question)
|
| 673 |
-
msgBox.setWindowTitle("No CSPR file generated")
|
| 674 |
-
msgBox.setText(
|
| 675 |
-
"No CSPR file has been generated, thus the main program cannot run. Please create a CSPR file."
|
| 676 |
-
"Alternatively, you could quit the program. Would you like to quit?")
|
| 677 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Yes)
|
| 678 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.No)
|
| 679 |
-
msgBox.exec()
|
| 680 |
-
|
| 681 |
-
if (msgBox.result() == QtWidgets.QMessageBox.Yes):
|
| 682 |
-
self.exit = True
|
| 683 |
-
self.close()
|
| 684 |
-
|
| 685 |
-
else:
|
| 686 |
-
self.process.kill()
|
| 687 |
-
self.clear_all()
|
| 688 |
-
self.goToPrompt.hide()
|
| 689 |
-
GlobalSettings.mainWindow.fill_annotation_dropdown()
|
| 690 |
-
if GlobalSettings.mainWindow.orgChoice.currentText() != '':
|
| 691 |
-
GlobalSettings.mainWindow.orgChoice.currentIndexChanged.disconnect()
|
| 692 |
-
GlobalSettings.mainWindow.orgChoice.clear()
|
| 693 |
-
GlobalSettings.mainWindow.endoChoice.clear()
|
| 694 |
-
GlobalSettings.mainWindow.getData()
|
| 695 |
-
GlobalSettings.MTWin.launch()
|
| 696 |
-
GlobalSettings.pop_Analysis.launch()
|
| 697 |
-
|
| 698 |
-
if GlobalSettings.pop_Analysis.first_show == True:
|
| 699 |
-
GlobalSettings.pop_Analysis.first_show = False
|
| 700 |
-
GlobalSettings.pop_Analysis.centerUI()
|
| 701 |
-
|
| 702 |
-
GlobalSettings.pop_Analysis.show()
|
| 703 |
-
self.hide()
|
| 704 |
-
except Exception as e:
|
| 705 |
-
show_error("Error in continue_to_pop() in New Genome.", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -5,7 +5,7 @@ from matplotlib.figure import Figure
|
|
| 5 |
import mplcursors
|
| 6 |
import numpy as np
|
| 7 |
import matplotlib.patches as patches
|
| 8 |
-
from utils.ui import show_error
|
| 9 |
|
| 10 |
class PopulationAnalysisWindowView(QtWidgets.QMainWindow):
|
| 11 |
def __init__(self, global_settings):
|
|
|
|
| 5 |
import mplcursors
|
| 6 |
import numpy as np
|
| 7 |
import matplotlib.patches as patches
|
| 8 |
+
from utils.ui import show_error
|
| 9 |
|
| 10 |
class PopulationAnalysisWindowView(QtWidgets.QMainWindow):
|
| 11 |
def __init__(self, global_settings):
|
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PyQt6 import QtWidgets, uic
|
| 2 |
+
from PyQt6.QtCore import pyqtSignal
|
| 3 |
+
import traceback
|
| 4 |
+
|
| 5 |
+
class ScoringOptionsView(QtWidgets.QMainWindow):
|
| 6 |
+
# Define signals
|
| 7 |
+
fasta_selected = pyqtSignal(str) # Signal when FASTA file is selected
|
| 8 |
+
submit_clicked = pyqtSignal() # Signal when submit button is clicked
|
| 9 |
+
|
| 10 |
+
def __init__(self, global_settings):
|
| 11 |
+
super().__init__()
|
| 12 |
+
self.settings = global_settings
|
| 13 |
+
self.logger = self.settings.get_logger()
|
| 14 |
+
self._init_ui()
|
| 15 |
+
|
| 16 |
+
def _init_ui(self):
|
| 17 |
+
try:
|
| 18 |
+
uic.loadUi(self.settings.get_ui_dir_path() + '/scoring_options.ui', self)
|
| 19 |
+
|
| 20 |
+
# Get UI elements
|
| 21 |
+
self.push_button_browse = self.findChild(QtWidgets.QPushButton, 'pbtnBrowse')
|
| 22 |
+
self.push_button_submit = self.findChild(QtWidgets.QPushButton, 'pbtnSubmit')
|
| 23 |
+
self.line_edit_fasta = self.findChild(QtWidgets.QLineEdit, 'ledInputFASTA')
|
| 24 |
+
self.radio_button_azimuth = self.findChild(QtWidgets.QRadioButton, 'rbtnAzimuth')
|
| 25 |
+
|
| 26 |
+
# Connect signals
|
| 27 |
+
self.push_button_browse.clicked.connect(self._browse_fasta)
|
| 28 |
+
self.push_button_submit.clicked.connect(self.submit_clicked.emit)
|
| 29 |
+
|
| 30 |
+
# Set window title
|
| 31 |
+
self.setWindowTitle("Select Scoring Algorithm")
|
| 32 |
+
|
| 33 |
+
# Apply theme
|
| 34 |
+
self.apply_theme()
|
| 35 |
+
|
| 36 |
+
except Exception as e:
|
| 37 |
+
self.logger.error(f"Error initializing ScoringOptionsView: {str(e)}")
|
| 38 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 39 |
+
raise
|
| 40 |
+
|
| 41 |
+
def apply_theme(self):
|
| 42 |
+
"""Apply the current theme to the window"""
|
| 43 |
+
try:
|
| 44 |
+
current_theme = self.settings.get_theme()
|
| 45 |
+
themes = {
|
| 46 |
+
"dark": {
|
| 47 |
+
"bg_color": "#2b2b2b",
|
| 48 |
+
"fg_color": "#ffffff",
|
| 49 |
+
"button_bg_color": "#3a3a3a",
|
| 50 |
+
"button_border_color": "#5a5a5a",
|
| 51 |
+
"button_hover_bg_color": "#4a4a4a",
|
| 52 |
+
"input_bg_color": "#3a3a3a",
|
| 53 |
+
"input_border_color": "#5a5a5a",
|
| 54 |
+
"progress_bar_bg": "#3a3a3a",
|
| 55 |
+
"progress_bar_chunk": "#51b85e"
|
| 56 |
+
},
|
| 57 |
+
"light": {
|
| 58 |
+
"bg_color": "#f0f0f0",
|
| 59 |
+
"fg_color": "#000000",
|
| 60 |
+
"button_bg_color": "#e0e0e0",
|
| 61 |
+
"button_border_color": "#c0c0c0",
|
| 62 |
+
"button_hover_bg_color": "#d0d0d0",
|
| 63 |
+
"input_bg_color": "#ffffff",
|
| 64 |
+
"input_border_color": "#c0c0c0",
|
| 65 |
+
"progress_bar_bg": "#e0e0e0",
|
| 66 |
+
"progress_bar_chunk": "#51b85e"
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
theme = themes["dark"] if current_theme == "dark" else themes["light"]
|
| 71 |
+
|
| 72 |
+
# Set the stylesheet
|
| 73 |
+
self.setStyleSheet(f"""
|
| 74 |
+
QMainWindow, QWidget {{
|
| 75 |
+
background-color: {theme['bg_color']};
|
| 76 |
+
color: {theme['fg_color']};
|
| 77 |
+
}}
|
| 78 |
+
QPushButton {{
|
| 79 |
+
background-color: {theme['button_bg_color']};
|
| 80 |
+
border: 1px solid {theme['button_border_color']};
|
| 81 |
+
padding: 5px;
|
| 82 |
+
min-width: 80px;
|
| 83 |
+
}}
|
| 84 |
+
QPushButton:hover {{
|
| 85 |
+
background-color: {theme['button_hover_bg_color']};
|
| 86 |
+
}}
|
| 87 |
+
QLineEdit {{
|
| 88 |
+
background-color: {theme['input_bg_color']};
|
| 89 |
+
border: 1px solid {theme['input_border_color']};
|
| 90 |
+
padding: 5px;
|
| 91 |
+
}}
|
| 92 |
+
QRadioButton {{
|
| 93 |
+
color: {theme['fg_color']};
|
| 94 |
+
}}
|
| 95 |
+
QProgressBar {{
|
| 96 |
+
border: 1px solid {theme['button_border_color']};
|
| 97 |
+
background-color: {theme['progress_bar_bg']};
|
| 98 |
+
text-align: center;
|
| 99 |
+
}}
|
| 100 |
+
QProgressBar::chunk {{
|
| 101 |
+
background-color: {theme['progress_bar_chunk']};
|
| 102 |
+
}}
|
| 103 |
+
QGroupBox {{
|
| 104 |
+
border: 1px solid {theme['button_border_color']};
|
| 105 |
+
margin-top: 0.5em;
|
| 106 |
+
padding-top: 0.5em;
|
| 107 |
+
}}
|
| 108 |
+
QGroupBox::title {{
|
| 109 |
+
color: {theme['fg_color']};
|
| 110 |
+
subcontrol-origin: margin;
|
| 111 |
+
left: 10px;
|
| 112 |
+
padding: 0 3px 0 3px;
|
| 113 |
+
}}
|
| 114 |
+
""")
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
self.logger.error(f"Error applying theme: {str(e)}")
|
| 118 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 119 |
+
|
| 120 |
+
def _browse_fasta(self):
|
| 121 |
+
try:
|
| 122 |
+
# Get database directory path
|
| 123 |
+
db_path = self.settings.get_db_path()
|
| 124 |
+
|
| 125 |
+
file_dialog = QtWidgets.QFileDialog()
|
| 126 |
+
file_path, _ = QtWidgets.QFileDialog.getOpenFileName(
|
| 127 |
+
file_dialog,
|
| 128 |
+
"Choose FASTA File",
|
| 129 |
+
db_path, # Set initial directory to database path
|
| 130 |
+
"FASTA Files (*.fa *.fasta *.fna)"
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
if file_path:
|
| 134 |
+
self.line_edit_fasta.setText(file_path)
|
| 135 |
+
self.fasta_selected.emit(file_path)
|
| 136 |
+
self.logger.debug(f"Selected FASTA file: {file_path}")
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
self.logger.error(f"Error browsing FASTA file: {str(e)}")
|
| 140 |
+
QtWidgets.QMessageBox.critical(
|
| 141 |
+
self,
|
| 142 |
+
"Error",
|
| 143 |
+
f"Error selecting FASTA file: {str(e)}"
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
def get_selected_algorithm(self):
|
| 147 |
+
"""Get the currently selected scoring algorithm"""
|
| 148 |
+
if self.radio_button_azimuth.isChecked():
|
| 149 |
+
return "Azimuth 2.0"
|
| 150 |
+
return None
|
| 151 |
+
|
| 152 |
+
def get_fasta_path(self):
|
| 153 |
+
"""Get the selected FASTA file path"""
|
| 154 |
+
return self.line_edit_fasta.text()
|
| 155 |
+
|
| 156 |
+
def show_error(self, title, message):
|
| 157 |
+
"""Show error message box"""
|
| 158 |
+
QtWidgets.QMessageBox.critical(self, title, message)
|
| 159 |
+
|
| 160 |
+
def show_info(self, title, message):
|
| 161 |
+
"""Show info message box"""
|
| 162 |
+
QtWidgets.QMessageBox.information(self, title, message)
|
|
@@ -1,10 +1,16 @@
|
|
| 1 |
from typing import Optional
|
| 2 |
-
from PyQt6 import QtWidgets, uic
|
| 3 |
from PyQt6.QtWidgets import QTableWidgetItem, QAbstractItemView
|
| 4 |
from PyQt6.QtGui import QTextDocument
|
|
|
|
| 5 |
from utils.ui import show_error
|
|
|
|
|
|
|
| 6 |
|
| 7 |
class ViewTargetsView(QtWidgets.QMainWindow):
|
|
|
|
|
|
|
|
|
|
| 8 |
def __init__(self, global_settings):
|
| 9 |
super().__init__()
|
| 10 |
self.settings = global_settings
|
|
@@ -26,6 +32,9 @@ class ViewTargetsView(QtWidgets.QMainWindow):
|
|
| 26 |
|
| 27 |
self.push_button_export_grna = self._find_widget('pbtnExportgRNA', QtWidgets.QPushButton)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
def _init_grpGuideViewer(self):
|
| 30 |
self.combo_box_gene = self._find_widget('cmbGene', QtWidgets.QComboBox)
|
| 31 |
self.combo_box_endonuclease = self._find_widget('cmbEndonuclease', QtWidgets.QComboBox)
|
|
@@ -61,63 +70,184 @@ class ViewTargetsView(QtWidgets.QMainWindow):
|
|
| 61 |
return widget
|
| 62 |
|
| 63 |
def display_targets_in_table(self, targets):
|
| 64 |
-
"""
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
#
|
| 69 |
-
if
|
| 70 |
-
self.
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
else:
|
| 79 |
-
self.
|
| 80 |
-
self.
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
def get_selected_targets(self):
|
| 94 |
"""Get selected targets with all necessary data"""
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
}
|
| 108 |
-
|
| 109 |
-
for row in selected_rows:
|
| 110 |
-
target = {
|
| 111 |
-
'location': self.table_targets.item(row, columns['location']).text(),
|
| 112 |
-
'endonuclease': self.table_targets.item(row, columns['endonuclease']).text(),
|
| 113 |
-
'sequence': self.table_targets.item(row, columns['sequence']).text(), # Get sequence
|
| 114 |
-
'strand': self.table_targets.item(row, columns['strand']).text(),
|
| 115 |
-
'pam': self.table_targets.item(row, columns['pam']).text(),
|
| 116 |
-
'score': self.table_targets.item(row, columns['score']).text(),
|
| 117 |
-
'off_target': self.table_targets.item(row, columns['off_target']).text()
|
| 118 |
}
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
def get_row_data(self, row):
|
| 123 |
return {
|
|
@@ -134,10 +264,53 @@ class ViewTargetsView(QtWidgets.QMainWindow):
|
|
| 134 |
self.combo_box_endonuclease.addItems(endonucleases)
|
| 135 |
|
| 136 |
def set_combo_box_gene(self, genes):
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
def set_text_edit_gene_viewer(self, sequence):
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
def update_gene_info(self, info):
|
| 143 |
# Implement this method if you have a widget to display gene info
|
|
@@ -180,3 +353,53 @@ class ViewTargetsView(QtWidgets.QMainWindow):
|
|
| 180 |
def get_export_file_path(self):
|
| 181 |
# Implement this method to get the export file path from the user
|
| 182 |
return QtWidgets.QFileDialog.getSaveFileName(self, 'Save File')[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from typing import Optional
|
| 2 |
+
from PyQt6 import QtWidgets, uic, QtCore
|
| 3 |
from PyQt6.QtWidgets import QTableWidgetItem, QAbstractItemView
|
| 4 |
from PyQt6.QtGui import QTextDocument
|
| 5 |
+
from PyQt6.QtCore import Qt, pyqtSignal
|
| 6 |
from utils.ui import show_error
|
| 7 |
+
import time
|
| 8 |
+
import traceback
|
| 9 |
|
| 10 |
class ViewTargetsView(QtWidgets.QMainWindow):
|
| 11 |
+
# Define the signal
|
| 12 |
+
gene_selected = pyqtSignal(str) # Signal to emit when gene is selected
|
| 13 |
+
|
| 14 |
def __init__(self, global_settings):
|
| 15 |
super().__init__()
|
| 16 |
self.settings = global_settings
|
|
|
|
| 32 |
|
| 33 |
self.push_button_export_grna = self._find_widget('pbtnExportgRNA', QtWidgets.QPushButton)
|
| 34 |
|
| 35 |
+
# Connect gene selection change with direct signal
|
| 36 |
+
self.combo_box_gene.currentTextChanged.connect(self._on_gene_changed)
|
| 37 |
+
|
| 38 |
def _init_grpGuideViewer(self):
|
| 39 |
self.combo_box_gene = self._find_widget('cmbGene', QtWidgets.QComboBox)
|
| 40 |
self.combo_box_endonuclease = self._find_widget('cmbEndonuclease', QtWidgets.QComboBox)
|
|
|
|
| 70 |
return widget
|
| 71 |
|
| 72 |
def display_targets_in_table(self, targets):
|
| 73 |
+
"""Ultra-fast target display using virtual table and minimal UI updates"""
|
| 74 |
+
try:
|
| 75 |
+
start_time = time.time()
|
| 76 |
+
|
| 77 |
+
# Store complete set of targets if not already stored
|
| 78 |
+
if not hasattr(self, '_complete_targets'):
|
| 79 |
+
self._complete_targets = targets
|
| 80 |
+
|
| 81 |
+
# Filter targets for currently selected gene
|
| 82 |
+
selected_text = self.combo_box_gene.currentText()
|
| 83 |
+
# Extract locus tag from "locus_tag: gene_name" format
|
| 84 |
+
selected_locus = selected_text.split(': ')[0] if ': ' in selected_text else selected_text
|
| 85 |
+
|
| 86 |
+
if selected_locus:
|
| 87 |
+
# Filter targets with more robust comparison
|
| 88 |
+
filtered_targets = []
|
| 89 |
+
for target in self._complete_targets:
|
| 90 |
+
target_locus = str(target.get('feature_id', '')).strip()
|
| 91 |
+
if target_locus.lower() == selected_locus.lower():
|
| 92 |
+
filtered_targets.append(target)
|
| 93 |
+
|
| 94 |
+
# Store filtered results
|
| 95 |
+
self._all_results = filtered_targets
|
| 96 |
else:
|
| 97 |
+
filtered_targets = self._complete_targets
|
| 98 |
+
self._all_results = filtered_targets
|
| 99 |
+
|
| 100 |
+
total_rows = len(filtered_targets)
|
| 101 |
+
|
| 102 |
+
# Completely freeze UI
|
| 103 |
+
self.setUpdatesEnabled(False)
|
| 104 |
+
self.table_targets.setUpdatesEnabled(False)
|
| 105 |
+
self.table_targets.setSortingEnabled(False)
|
| 106 |
+
self.table_targets.setVisible(False)
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
# Pre-allocate table
|
| 110 |
+
self.table_targets.clearContents()
|
| 111 |
+
self.table_targets.setRowCount(total_rows)
|
| 112 |
+
|
| 113 |
+
# Get current headers to check for Azimuth column
|
| 114 |
+
headers = self.get_table_headers()
|
| 115 |
+
azimuth_index = headers.index("Azimuth 2.0") if "Azimuth 2.0" in headers else None
|
| 116 |
+
|
| 117 |
+
# Pre-create flags once
|
| 118 |
+
flags = Qt.ItemFlag.ItemIsEnabled | Qt.ItemFlag.ItemIsSelectable
|
| 119 |
+
|
| 120 |
+
# Load ALL rows at once
|
| 121 |
+
for row in range(total_rows):
|
| 122 |
+
target = filtered_targets[row]
|
| 123 |
+
|
| 124 |
+
# Create and set basic items
|
| 125 |
+
for col, value in enumerate([
|
| 126 |
+
target['location'], target['endonuclease'],
|
| 127 |
+
target['sequence'], target['strand'], target['pam']
|
| 128 |
+
]):
|
| 129 |
+
item = QTableWidgetItem(str(value))
|
| 130 |
+
item.setFlags(flags)
|
| 131 |
+
self.table_targets.setItem(row, col, item)
|
| 132 |
+
|
| 133 |
+
# Handle score separately for numeric sorting
|
| 134 |
+
score_item = QTableWidgetItem()
|
| 135 |
+
score_item.setData(QtCore.Qt.ItemDataRole.EditRole, float(target['score']))
|
| 136 |
+
self.table_targets.setItem(row, 5, score_item)
|
| 137 |
+
|
| 138 |
+
# Add off-target placeholder
|
| 139 |
+
ot_item = QTableWidgetItem("--.--")
|
| 140 |
+
self.table_targets.setItem(row, 6, ot_item)
|
| 141 |
+
|
| 142 |
+
# Create details button
|
| 143 |
+
details_button = QtWidgets.QPushButton("Details")
|
| 144 |
+
self.table_targets.setCellWidget(row, 7, details_button)
|
| 145 |
+
|
| 146 |
+
# Add Azimuth score if column exists
|
| 147 |
+
if azimuth_index is not None and 'azimuth_score' in target:
|
| 148 |
+
azimuth_item = QTableWidgetItem()
|
| 149 |
+
azimuth_item.setData(QtCore.Qt.ItemDataRole.EditRole, float(target['azimuth_score']))
|
| 150 |
+
self.table_targets.setItem(row, azimuth_index, azimuth_item)
|
| 151 |
+
|
| 152 |
+
# Set column widths
|
| 153 |
+
column_widths = [100, 100, 200, 80, 80, 80, 80, 100]
|
| 154 |
+
for col, width in enumerate(column_widths):
|
| 155 |
+
self.table_targets.setColumnWidth(col, width)
|
| 156 |
+
|
| 157 |
+
finally:
|
| 158 |
+
# Re-enable UI
|
| 159 |
+
self.table_targets.setVisible(True)
|
| 160 |
+
self.table_targets.setUpdatesEnabled(True)
|
| 161 |
+
self.setUpdatesEnabled(True)
|
| 162 |
+
self.table_targets.setSortingEnabled(True)
|
| 163 |
+
|
| 164 |
+
total_time = time.time() - start_time
|
| 165 |
+
self.logger.debug(f"Display time: {total_time:.2f} seconds for {total_rows} rows")
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
self.logger.error(f"Error in display_results: {str(e)}")
|
| 169 |
+
show_error(self.settings, "Error displaying targets", str(e))
|
| 170 |
+
|
| 171 |
+
def _handle_scroll_virtual(self, value, total_rows, row_height, buffer_rows):
|
| 172 |
+
"""Handle virtual scrolling with minimal updates"""
|
| 173 |
+
try:
|
| 174 |
+
if not hasattr(self, '_all_results') or not self._all_results:
|
| 175 |
+
return
|
| 176 |
+
|
| 177 |
+
# Calculate visible range with safety checks
|
| 178 |
+
viewport_height = max(1, self.table_targets.viewport().height())
|
| 179 |
+
row_height = max(1, row_height) # Ensure non-zero
|
| 180 |
+
visible_rows = viewport_height // row_height
|
| 181 |
+
|
| 182 |
+
# Calculate which rows should be visible
|
| 183 |
+
current_row = value // row_height if row_height > 0 else 0
|
| 184 |
+
start_row = max(0, current_row - buffer_rows)
|
| 185 |
+
end_row = min(total_rows, current_row + visible_rows + buffer_rows)
|
| 186 |
+
|
| 187 |
+
# Only update rows that aren't already loaded
|
| 188 |
+
for row in range(start_row, end_row):
|
| 189 |
+
if row < len(self._all_results) and not self.table_targets.item(row, 0):
|
| 190 |
+
target = self._all_results[row]
|
| 191 |
+
|
| 192 |
+
# Create and set items efficiently
|
| 193 |
+
for col, value in enumerate([
|
| 194 |
+
target['location'], target['endonuclease'],
|
| 195 |
+
target['sequence'], target['strand'], target['pam'],
|
| 196 |
+
target['score'], "--.--"
|
| 197 |
+
]):
|
| 198 |
+
item = QTableWidgetItem(str(value))
|
| 199 |
+
item.setFlags(Qt.ItemFlag.ItemIsEnabled | Qt.ItemFlag.ItemIsSelectable)
|
| 200 |
+
self.table_targets.setItem(row, col, item)
|
| 201 |
+
|
| 202 |
+
if not self.table_targets.cellWidget(row, 7):
|
| 203 |
+
details_button = QtWidgets.QPushButton("Details")
|
| 204 |
+
self.table_targets.setCellWidget(row, 7, details_button)
|
| 205 |
+
|
| 206 |
+
except Exception as e:
|
| 207 |
+
self.logger.error(f"Error in _handle_scroll_virtual: {str(e)}")
|
| 208 |
|
| 209 |
def get_selected_targets(self):
|
| 210 |
"""Get selected targets with all necessary data"""
|
| 211 |
+
try:
|
| 212 |
+
selected_rows = set(index.row() for index in self.table_targets.selectedIndexes())
|
| 213 |
+
selected_targets = []
|
| 214 |
+
|
| 215 |
+
# Get column indices once
|
| 216 |
+
columns = {
|
| 217 |
+
'location': 0,
|
| 218 |
+
'endonuclease': 1,
|
| 219 |
+
'sequence': 2,
|
| 220 |
+
'strand': 3,
|
| 221 |
+
'pam': 4,
|
| 222 |
+
'score': 5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
}
|
| 224 |
+
|
| 225 |
+
for row in sorted(selected_rows):
|
| 226 |
+
# Verify all required cells have data
|
| 227 |
+
if all(self.table_targets.item(row, col) is not None
|
| 228 |
+
for col in columns.values()):
|
| 229 |
+
|
| 230 |
+
target = {
|
| 231 |
+
'location': self.table_targets.item(row, columns['location']).text(),
|
| 232 |
+
'endonuclease': self.table_targets.item(row, columns['endonuclease']).text(),
|
| 233 |
+
'sequence': self.table_targets.item(row, columns['sequence']).text(),
|
| 234 |
+
'strand': self.table_targets.item(row, columns['strand']).text(),
|
| 235 |
+
'pam': self.table_targets.item(row, columns['pam']).text(),
|
| 236 |
+
'score': self.table_targets.item(row, columns['score']).text()
|
| 237 |
+
}
|
| 238 |
+
selected_targets.append(target)
|
| 239 |
+
else:
|
| 240 |
+
self.logger.warning(f"Skipping row {row} due to missing data")
|
| 241 |
+
|
| 242 |
+
if not selected_targets:
|
| 243 |
+
self.logger.warning("No valid targets selected")
|
| 244 |
+
|
| 245 |
+
return selected_targets
|
| 246 |
+
|
| 247 |
+
except Exception as e:
|
| 248 |
+
self.logger.error(f"Error getting selected targets: {str(e)}")
|
| 249 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 250 |
+
return []
|
| 251 |
|
| 252 |
def get_row_data(self, row):
|
| 253 |
return {
|
|
|
|
| 264 |
self.combo_box_endonuclease.addItems(endonucleases)
|
| 265 |
|
| 266 |
def set_combo_box_gene(self, genes):
|
| 267 |
+
"""Set genes in combo box with optimized performance"""
|
| 268 |
+
try:
|
| 269 |
+
start_time = time.time()
|
| 270 |
+
|
| 271 |
+
# Disable UI updates
|
| 272 |
+
self.combo_box_gene.blockSignals(True)
|
| 273 |
+
self.combo_box_gene.setUpdatesEnabled(False)
|
| 274 |
+
|
| 275 |
+
# Clear existing items efficiently
|
| 276 |
+
self.combo_box_gene.clear()
|
| 277 |
+
|
| 278 |
+
# Debug logging
|
| 279 |
+
self.logger.debug(f"Received {len(genes)} genes")
|
| 280 |
+
|
| 281 |
+
# Add items in a single batch
|
| 282 |
+
if genes:
|
| 283 |
+
# Pre-allocate size
|
| 284 |
+
self.combo_box_gene.insertItems(0, genes)
|
| 285 |
+
|
| 286 |
+
# Set first item without triggering updates
|
| 287 |
+
if self.combo_box_gene.count() > 0:
|
| 288 |
+
self.combo_box_gene.setCurrentIndex(0)
|
| 289 |
+
|
| 290 |
+
self.logger.debug(f"Added {len(genes)} genes to combo box")
|
| 291 |
+
|
| 292 |
+
# Re-enable UI updates
|
| 293 |
+
self.combo_box_gene.setUpdatesEnabled(True)
|
| 294 |
+
self.combo_box_gene.blockSignals(False)
|
| 295 |
+
|
| 296 |
+
total_time = time.time() - start_time
|
| 297 |
+
self.logger.debug(f"Combo box update time: {total_time:.2f} seconds")
|
| 298 |
+
|
| 299 |
+
except Exception as e:
|
| 300 |
+
self.logger.error(f"Error setting genes in combo box: {str(e)}")
|
| 301 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 302 |
|
| 303 |
def set_text_edit_gene_viewer(self, sequence):
|
| 304 |
+
"""Update gene viewer with new sequence"""
|
| 305 |
+
try:
|
| 306 |
+
if sequence:
|
| 307 |
+
self.text_edit_gene_viewer.setText(sequence)
|
| 308 |
+
self.logger.debug(f"Updated gene viewer with sequence of length: {len(sequence)}")
|
| 309 |
+
else:
|
| 310 |
+
self.text_edit_gene_viewer.clear()
|
| 311 |
+
self.logger.debug("Cleared gene viewer - no sequence provided")
|
| 312 |
+
except Exception as e:
|
| 313 |
+
self.logger.error(f"Error setting gene viewer text: {str(e)}")
|
| 314 |
|
| 315 |
def update_gene_info(self, info):
|
| 316 |
# Implement this method if you have a widget to display gene info
|
|
|
|
| 353 |
def get_export_file_path(self):
|
| 354 |
# Implement this method to get the export file path from the user
|
| 355 |
return QtWidgets.QFileDialog.getSaveFileName(self, 'Save File')[0]
|
| 356 |
+
|
| 357 |
+
def _on_gene_changed(self, selected_text):
|
| 358 |
+
"""Handle gene selection change and emit signal"""
|
| 359 |
+
try:
|
| 360 |
+
self.logger.debug(f"Gene selection changed to: {selected_text}")
|
| 361 |
+
|
| 362 |
+
# Reset scroll position
|
| 363 |
+
self.table_targets.verticalScrollBar().setValue(0)
|
| 364 |
+
|
| 365 |
+
# Filter and display targets
|
| 366 |
+
if hasattr(self, '_complete_targets'):
|
| 367 |
+
self.display_targets_in_table(self._complete_targets)
|
| 368 |
+
|
| 369 |
+
# Emit signal for controller to update gene sequence
|
| 370 |
+
self.gene_selected.emit(selected_text)
|
| 371 |
+
|
| 372 |
+
except Exception as e:
|
| 373 |
+
self.logger.error(f"Error in _on_gene_changed: {str(e)}")
|
| 374 |
+
self.logger.error(f"Stack trace: {traceback.format_exc()}")
|
| 375 |
+
|
| 376 |
+
def get_table_headers(self):
|
| 377 |
+
"""Get current table headers"""
|
| 378 |
+
headers = []
|
| 379 |
+
for i in range(self.table_targets.columnCount()):
|
| 380 |
+
headers.append(self.table_targets.horizontalHeaderItem(i).text())
|
| 381 |
+
return headers
|
| 382 |
+
|
| 383 |
+
def add_scoring_column(self, algorithm_name, position=None):
|
| 384 |
+
"""Add a new column for alternative scoring method at specified position"""
|
| 385 |
+
if position is None:
|
| 386 |
+
# Add to end if no position specified
|
| 387 |
+
position = self.table_targets.columnCount()
|
| 388 |
+
|
| 389 |
+
self.table_targets.insertColumn(position)
|
| 390 |
+
self.table_targets.setHorizontalHeaderItem(
|
| 391 |
+
position,
|
| 392 |
+
QtWidgets.QTableWidgetItem(algorithm_name)
|
| 393 |
+
)
|
| 394 |
+
|
| 395 |
+
# Shift any existing columns after the insertion point
|
| 396 |
+
for i in range(self.table_targets.columnCount() - 1, position, -1):
|
| 397 |
+
for row in range(self.table_targets.rowCount()):
|
| 398 |
+
self.table_targets.setItem(row, i, self.table_targets.takeItem(row, i-1))
|
| 399 |
+
|
| 400 |
+
# Move column header
|
| 401 |
+
header_item = self.table_targets.takeHorizontalHeaderItem(i-1)
|
| 402 |
+
if header_item:
|
| 403 |
+
self.table_targets.setHorizontalHeaderItem(i, header_item)
|
| 404 |
+
|
| 405 |
+
return position
|
|
@@ -1,73 +0,0 @@
|
|
| 1 |
-
import models.GlobalSettings as GlobalSettings
|
| 2 |
-
import os
|
| 3 |
-
from PyQt5 import QtWidgets, Qt, uic
|
| 4 |
-
import traceback
|
| 5 |
-
import math
|
| 6 |
-
from utils.ui import show_error, scale_ui
|
| 7 |
-
|
| 8 |
-
logger = GlobalSettings.logger
|
| 9 |
-
|
| 10 |
-
###########################################################
|
| 11 |
-
# closingWindow: this class is a little window where the user can select which files they want to delete
|
| 12 |
-
# Once they hit 'submit' it will delete all of the files selected, and close the program.
|
| 13 |
-
# If no files are selected, the program closes and no files are deleted
|
| 14 |
-
# Inputs are taking from the user (selecting files to delete and hitting submit), as well as GlobalSettings for the files in CSPR_DB
|
| 15 |
-
# Outputs are the files are deleting, and the program is closed
|
| 16 |
-
###########################################################
|
| 17 |
-
class closingWindow(QtWidgets.QMainWindow):
|
| 18 |
-
def __init__(self):
|
| 19 |
-
try:
|
| 20 |
-
super(closingWindow, self).__init__()
|
| 21 |
-
uic.loadUi(GlobalSettings.appdir + "ui/closing_window.ui", self)
|
| 22 |
-
self.setWindowTitle("Delete Files")
|
| 23 |
-
self.setWindowIcon(Qt.QIcon(GlobalSettings.appdir + "cas9image.ico"))
|
| 24 |
-
|
| 25 |
-
# Button
|
| 26 |
-
self.submit_button.clicked.connect(self.submit_and_close)
|
| 27 |
-
|
| 28 |
-
# Table
|
| 29 |
-
self.files_table.setColumnCount(1)
|
| 30 |
-
self.files_table.setShowGrid(True)
|
| 31 |
-
self.files_table.setHorizontalHeaderLabels("File Name;".split(";"))
|
| 32 |
-
self.files_table.setSelectionBehavior(QtWidgets.QAbstractItemView.SelectRows)
|
| 33 |
-
self.files_table.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
|
| 34 |
-
self.files_table.setSelectionMode(QtWidgets.QAbstractItemView.MultiSelection)
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
scale_ui(self, custom_scale_width=400, custom_scale_height=300)
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
except Exception as e:
|
| 41 |
-
show_error("Error initializing closingWindow class.", e)
|
| 42 |
-
|
| 43 |
-
# this function will delete selected files, and then close the program
|
| 44 |
-
def submit_and_close(self):
|
| 45 |
-
try:
|
| 46 |
-
# loop through the whole table
|
| 47 |
-
for i in range(self.files_table.rowCount()):
|
| 48 |
-
tabWidget = self.files_table.item(i, 0)
|
| 49 |
-
|
| 50 |
-
# if that specific tab is selected, delete it. otherwise do nothing
|
| 51 |
-
if tabWidget.isSelected():
|
| 52 |
-
os.remove(tabWidget.text())
|
| 53 |
-
self.close()
|
| 54 |
-
except Exception as e:
|
| 55 |
-
show_error("Error in sumbit_and_close() in closing window.", e)
|
| 56 |
-
|
| 57 |
-
# this function gets all of the files from the CSPR_DB and puts them all into the table
|
| 58 |
-
def get_files(self):
|
| 59 |
-
try:
|
| 60 |
-
loopCount = 0
|
| 61 |
-
# get the file names from CSPR_DB
|
| 62 |
-
files_names = os.listdir(GlobalSettings.CSPR_DB)
|
| 63 |
-
files_names.sort(key=str.lower)
|
| 64 |
-
self.files_table.setRowCount(len(files_names))
|
| 65 |
-
|
| 66 |
-
# loop through and add them to the table
|
| 67 |
-
for file in files_names:
|
| 68 |
-
tabWidget = QtWidgets.QTableWidgetItem(file)
|
| 69 |
-
self.files_table.setItem(loopCount, 0, tabWidget)
|
| 70 |
-
loopCount += 1
|
| 71 |
-
self.files_table.resizeColumnsToContents()
|
| 72 |
-
except Exception as e:
|
| 73 |
-
show_error("Error in get_files() in closing window.", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1,259 +0,0 @@
|
|
| 1 |
-
import models.GlobalSettings as GlobalSettings
|
| 2 |
-
from utils.sequence_utils import get_table_headers
|
| 3 |
-
import os
|
| 4 |
-
from PyQt5 import QtWidgets, Qt, uic, QtCore, QtGui
|
| 5 |
-
import platform
|
| 6 |
-
import traceback
|
| 7 |
-
import math
|
| 8 |
-
from utils.ui import show_message, show_error, scale_ui, center_ui
|
| 9 |
-
|
| 10 |
-
logger = GlobalSettings.logger
|
| 11 |
-
|
| 12 |
-
# This class opens a window for the user to select where they want the CSV file exported to, and the name of the file
|
| 13 |
-
# It takes the highlighted data from the Results page, and creates a CSV file from that
|
| 14 |
-
class export_tool(QtWidgets.QMainWindow):
|
| 15 |
-
def __init__(self):
|
| 16 |
-
try:
|
| 17 |
-
super(export_tool, self).__init__()
|
| 18 |
-
uic.loadUi(GlobalSettings.appdir + 'ui/export_tool.ui', self)
|
| 19 |
-
self.setWindowIcon(Qt.QIcon(GlobalSettings.appdir + "cas9image.ico"))
|
| 20 |
-
|
| 21 |
-
self.browse_button.clicked.connect(self.browseForFolder)
|
| 22 |
-
self.cancel_button.clicked.connect(self.cancel_function)
|
| 23 |
-
self.export_button.clicked.connect(self.export_function)
|
| 24 |
-
|
| 25 |
-
# Set up validators for input fields:
|
| 26 |
-
reg_ex = QtCore.QRegExp("[^,]+") # No commas
|
| 27 |
-
input_validator = QtGui.QRegExpValidator(reg_ex, self)
|
| 28 |
-
self.leading_seq.setValidator(input_validator)
|
| 29 |
-
self.trailing_seq.setValidator(input_validator)
|
| 30 |
-
|
| 31 |
-
# GroupBox styling
|
| 32 |
-
groupbox_style = """
|
| 33 |
-
QGroupBox:title{subcontrol-origin: margin;
|
| 34 |
-
left: 10px;
|
| 35 |
-
padding: 0 5px 0 5px;}
|
| 36 |
-
QGroupBox#gRNA_Options{border: 2px solid rgb(111,181,110);
|
| 37 |
-
border-radius: 9px;
|
| 38 |
-
margin-top: 10px;
|
| 39 |
-
font: bold 14pt 'Arial';} """
|
| 40 |
-
self.gRNA_Options.setStyleSheet(groupbox_style)
|
| 41 |
-
|
| 42 |
-
self.location = self.fileLocation_line_edit.text()
|
| 43 |
-
self.selected_table_items = []
|
| 44 |
-
self.window = ""
|
| 45 |
-
self.num_columns = []
|
| 46 |
-
self.locus_tag = False
|
| 47 |
-
self.gene_name = False
|
| 48 |
-
|
| 49 |
-
self.setWindowTitle("Export to CSV")
|
| 50 |
-
scale_ui(self, custom_scale_width=650, custom_scale_height=200)
|
| 51 |
-
|
| 52 |
-
except Exception as e:
|
| 53 |
-
show_error("Error initializing export_tool class.", e)
|
| 54 |
-
|
| 55 |
-
# launch function. Called in Results.
|
| 56 |
-
# parameter expect: a list of the items selected from the window.
|
| 57 |
-
def launch(self, select_items, window):
|
| 58 |
-
try:
|
| 59 |
-
if platform.system() == "Windows":
|
| 60 |
-
self.fileLocation_line_edit.setText(GlobalSettings.CSPR_DB + "\\")
|
| 61 |
-
else:
|
| 62 |
-
self.fileLocation_line_edit.setText(GlobalSettings.CSPR_DB + "/")
|
| 63 |
-
self.selected_table_items = select_items
|
| 64 |
-
self.window = window
|
| 65 |
-
center_ui(self)
|
| 66 |
-
self.show()
|
| 67 |
-
self.activateWindow()
|
| 68 |
-
except Exception as e:
|
| 69 |
-
show_error("Error in launch() in export_tool.", e)
|
| 70 |
-
|
| 71 |
-
# Takes the path and file name and combines them
|
| 72 |
-
# Writes the header line, as well as ever line selected to that file
|
| 73 |
-
# calls the cancel function when it's done
|
| 74 |
-
def export_function(self):
|
| 75 |
-
try:
|
| 76 |
-
delim = self.delimBox.currentText()
|
| 77 |
-
# get the full path ( path and file name)
|
| 78 |
-
file_name = self.filename_line_edit.text()
|
| 79 |
-
if file_name == "":
|
| 80 |
-
file_name = "exported_gRNAs"
|
| 81 |
-
self.location = self.fileLocation_line_edit.text()
|
| 82 |
-
full_path = ""
|
| 83 |
-
if '.' in file_name: # If user added the file extension...
|
| 84 |
-
full_path = self.location + file_name
|
| 85 |
-
else:
|
| 86 |
-
if delim == ",":
|
| 87 |
-
full_path = self.location + file_name + '.csv'
|
| 88 |
-
elif delim == r"\t":
|
| 89 |
-
delim = "\t"
|
| 90 |
-
full_path = self.location + file_name + '.tsv'
|
| 91 |
-
else:
|
| 92 |
-
full_path = self.location + file_name + '.txt'
|
| 93 |
-
try:
|
| 94 |
-
output_data = open(full_path, 'w')
|
| 95 |
-
""" Write the table headers """
|
| 96 |
-
if self.window == "mt": ###Change headers for multitargeting table export
|
| 97 |
-
headers = get_table_headers(GlobalSettings.MTWin.table)
|
| 98 |
-
num_cols = len(headers) # Calculate the number of columns based on the headers list above
|
| 99 |
-
insertion_index = headers.index("% Consensus")
|
| 100 |
-
headers.insert(insertion_index, "Full Sequence")
|
| 101 |
-
output_data.write(delim.join(headers)+"\n")
|
| 102 |
-
elif self.window == "pa":
|
| 103 |
-
headers = get_table_headers(GlobalSettings.pop_Analysis.table2)
|
| 104 |
-
num_cols = len(headers) # Calculate the number of columns based on the headers list above
|
| 105 |
-
insertion_index = headers.index("% Consensus")
|
| 106 |
-
headers.insert(insertion_index, "Full Sequence")
|
| 107 |
-
output_data.write(delim.join(headers)+"\n")
|
| 108 |
-
else: ###Change headers for view results export
|
| 109 |
-
headers = get_table_headers(GlobalSettings.mainWindow.Results.targetTable)
|
| 110 |
-
headers.remove("Details") # For some reason, the details column doesn't carry any "items"
|
| 111 |
-
num_cols = len(headers) # Calculate the number of columns based on the headers list above
|
| 112 |
-
insertion_index = headers.index("Strand")
|
| 113 |
-
headers.insert(insertion_index, "Full Sequence")
|
| 114 |
-
|
| 115 |
-
if GlobalSettings.mainWindow.radioButton_Gene.isChecked(): # If the user chose to search via Feature
|
| 116 |
-
tmp = GlobalSettings.mainWindow.Results.comboBoxGene.currentText().split(":") # Check to see if the locus tag was found for the current gene
|
| 117 |
-
if len(tmp) > 1: # If locus tag exists for gene, include in output
|
| 118 |
-
headers.extend(["Locus_Tag","Gene_Name"])
|
| 119 |
-
output_data.write(delim.join(headers)+"\n")
|
| 120 |
-
self.locus_tag = True
|
| 121 |
-
self.gene_name = True
|
| 122 |
-
else: # If locus tag does not exist for gene, only include the gene name
|
| 123 |
-
headers.append("Gene_Name")
|
| 124 |
-
output_data.write(delim.join(headers)+"\n")
|
| 125 |
-
self.gene_name = True
|
| 126 |
-
self.locus_tag = False
|
| 127 |
-
else: # If user searched by sequence or position, don't include locus tag or gene name
|
| 128 |
-
output_data.write(delim.join(headers)+"\n")
|
| 129 |
-
self.gene_name = False
|
| 130 |
-
self.locus_tag = False
|
| 131 |
-
|
| 132 |
-
""" Write the data out """
|
| 133 |
-
tmp_list = []
|
| 134 |
-
if self.locus_tag: #If the user is exporting data from VT and locus tag exists for current gene
|
| 135 |
-
tmp = GlobalSettings.mainWindow.Results.comboBoxGene.currentText().split(":") # Get the locus tag
|
| 136 |
-
locus_tag = str(tmp[0].strip())
|
| 137 |
-
gene_name = str(tmp[-1].strip())
|
| 138 |
-
seq_index = headers.index("Sequence") # Get the gene name
|
| 139 |
-
it = 0
|
| 140 |
-
for i, item in enumerate(self.selected_table_items): # Loop through all the items in the View Targets table
|
| 141 |
-
if (i+1) % num_cols == 0:
|
| 142 |
-
tmp_list.append(item.text())
|
| 143 |
-
tmp_list.append(locus_tag)
|
| 144 |
-
tmp_list.append(gene_name)
|
| 145 |
-
output_data.write(delim.join(tmp_list)+"\n") # Write data out
|
| 146 |
-
tmp_list.clear() # Reset list
|
| 147 |
-
it = 0 # Reset iterator
|
| 148 |
-
elif it == seq_index:
|
| 149 |
-
tmp_list.append(item.text())
|
| 150 |
-
tmp_list.append(self.leading_seq.text().strip() + item.text() + self.trailing_seq.text().strip()) #5' Leader + gRNA + 3' Trailer
|
| 151 |
-
it += 1
|
| 152 |
-
else:
|
| 153 |
-
tmp_list.append(item.text())
|
| 154 |
-
it += 1
|
| 155 |
-
elif self.gene_name: #If the user is exporting data from VT and locus tag doesn't exist for current gene
|
| 156 |
-
gene_name = str(GlobalSettings.mainWindow.Results.comboBoxGene.currentText().strip()) # Get the locus tag
|
| 157 |
-
seq_index = headers.index("Sequence") # Get the gene name
|
| 158 |
-
it = 0
|
| 159 |
-
for i, item in enumerate(self.selected_table_items): # Loop through all the items in the View Targets table
|
| 160 |
-
if (i+1) % num_cols == 0:
|
| 161 |
-
tmp_list.append(item.text())
|
| 162 |
-
tmp_list.append(gene_name)
|
| 163 |
-
output_data.write(delim.join(tmp_list)+"\n")
|
| 164 |
-
tmp_list.clear()
|
| 165 |
-
it = 0 # Reset iterator
|
| 166 |
-
elif it == seq_index:
|
| 167 |
-
tmp_list.append(item.text())
|
| 168 |
-
tmp_list.append(self.leading_seq.text().strip() + item.text() + self.trailing_seq.text().strip()) #5' Leader + gRNA + 3' Trailer
|
| 169 |
-
it += 1
|
| 170 |
-
else:
|
| 171 |
-
tmp_list.append(item.text())
|
| 172 |
-
it += 1
|
| 173 |
-
elif self.window in ["mt", "pa"]: #If the user is exporting data from multitargeting
|
| 174 |
-
seq_index = headers.index("Consensus Sequence")
|
| 175 |
-
it = 0
|
| 176 |
-
for i, item in enumerate(self.selected_table_items): # Loop through all the items in the View Targets table
|
| 177 |
-
if (i+1) % num_cols == 0:
|
| 178 |
-
tmp_list.append(item.text())
|
| 179 |
-
output_data.write(str(delim.join(tmp_list))+"\n")
|
| 180 |
-
tmp_list.clear()
|
| 181 |
-
it = 0 # Reset iterator
|
| 182 |
-
elif it == seq_index:
|
| 183 |
-
tmp_list.append(item.text())
|
| 184 |
-
tmp_list.append(self.leading_seq.text().strip() + item.text() + self.trailing_seq.text().strip()) #5' Leader + gRNA + 3' Trailer
|
| 185 |
-
it += 1
|
| 186 |
-
else:
|
| 187 |
-
tmp_list.append(item.text())
|
| 188 |
-
it += 1
|
| 189 |
-
else: #If the user is exporting data from View Targets but is not using Feature search
|
| 190 |
-
seq_index = headers.index("Sequence") # Get the gene name
|
| 191 |
-
it = 0
|
| 192 |
-
for i, item in enumerate(self.selected_table_items): # Loop through all the items in the View Targets table
|
| 193 |
-
if (i+1) % num_cols == 0:
|
| 194 |
-
tmp_list.append(item.text())
|
| 195 |
-
output_data.write(delim.join(tmp_list)+"\n")
|
| 196 |
-
tmp_list.clear()
|
| 197 |
-
it = 0 # Reset iterator
|
| 198 |
-
elif it == seq_index:
|
| 199 |
-
tmp_list.append(item.text())
|
| 200 |
-
tmp_list.append(self.leading_seq.text().strip() + item.text() + self.trailing_seq.text().strip()) #5' Leader + gRNA + 3' Trailer
|
| 201 |
-
it += 1
|
| 202 |
-
else:
|
| 203 |
-
tmp_list.append(item.text())
|
| 204 |
-
it += 1
|
| 205 |
-
output_data.close()
|
| 206 |
-
except PermissionError:
|
| 207 |
-
show_error("This file cannot be opened. Please make sure that the file is not opened elsewhere and try again.", e)
|
| 208 |
-
return
|
| 209 |
-
|
| 210 |
-
except Exception as e:
|
| 211 |
-
show_error("Error in export_function() in export_tool.", e)
|
| 212 |
-
return
|
| 213 |
-
|
| 214 |
-
""" Print "finished" message """
|
| 215 |
-
show_message(
|
| 216 |
-
fontSize=12,
|
| 217 |
-
icon=QtWidgets.QMessageBox.Icon.Information,
|
| 218 |
-
title="Export Complete",
|
| 219 |
-
message=f"Export to {full_path} was successful."
|
| 220 |
-
)
|
| 221 |
-
|
| 222 |
-
# close the window
|
| 223 |
-
self.cancel_function()
|
| 224 |
-
except Exception as e:
|
| 225 |
-
show_error("Error in export_function() in export_tool.", e)
|
| 226 |
-
|
| 227 |
-
# Resets everything to the init funciton
|
| 228 |
-
# then closes the window
|
| 229 |
-
def cancel_function(self):
|
| 230 |
-
try:
|
| 231 |
-
if platform.system() == "Windows":
|
| 232 |
-
self.fileLocation_line_edit.setText(GlobalSettings.CSPR_DB + "\\")
|
| 233 |
-
else:
|
| 234 |
-
self.fileLocation_line_edit.setText(GlobalSettings.CSPR_DB + "/")
|
| 235 |
-
self.filename_line_edit.setText("")
|
| 236 |
-
self.location = ""
|
| 237 |
-
self.hide()
|
| 238 |
-
except Exception as e:
|
| 239 |
-
show_error("Error in cancel_function() in export_tool.", e)
|
| 240 |
-
|
| 241 |
-
# browse for folder function
|
| 242 |
-
# allows user to browse for a folder where to store the CSV file
|
| 243 |
-
def browseForFolder(self):
|
| 244 |
-
try:
|
| 245 |
-
# get the folder
|
| 246 |
-
filed = QtWidgets.QFileDialog()
|
| 247 |
-
mydir = QtWidgets.QFileDialog.getExistingDirectory(filed, "Open a Folder",
|
| 248 |
-
GlobalSettings.CSPR_DB, QtWidgets.QFileDialog.ShowDirsOnly)
|
| 249 |
-
if(os.path.isdir(mydir) == False):
|
| 250 |
-
return
|
| 251 |
-
|
| 252 |
-
if platform.system() == "Windows":
|
| 253 |
-
self.fileLocation_line_edit.setText(mydir + "\\")
|
| 254 |
-
self.location = mydir + "\\"
|
| 255 |
-
else:
|
| 256 |
-
self.fileLocation_line_edit.setText(mydir + "/")
|
| 257 |
-
self.location = mydir + "/"
|
| 258 |
-
except Exception as e:
|
| 259 |
-
show_error("Error in browseForFolder() in export_tool.", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1,662 +0,0 @@
|
|
| 1 |
-
import models.GlobalSettings as GlobalSettings
|
| 2 |
-
import os
|
| 3 |
-
from PyQt5 import QtWidgets, Qt, uic, QtCore
|
| 4 |
-
from functools import partial
|
| 5 |
-
from models.CSPRparser import CSPRparser
|
| 6 |
-
import re
|
| 7 |
-
import platform
|
| 8 |
-
import traceback
|
| 9 |
-
import math
|
| 10 |
-
from utils.ui import show_message, show_error, scale_ui, center_ui
|
| 11 |
-
from views.annotation_functions import *
|
| 12 |
-
|
| 13 |
-
logger = GlobalSettings.logger
|
| 14 |
-
|
| 15 |
-
# this class is a window that allows the user to select the settings for Generate Library
|
| 16 |
-
# When the user clicks Generate Library, it goes ahead and gets the Annotation Data needed
|
| 17 |
-
# Then the user can select the settings they want, and then hit submit.
|
| 18 |
-
# It creates a txt file with the data
|
| 19 |
-
class genLibrary(QtWidgets.QMainWindow):
|
| 20 |
-
def __init__(self):
|
| 21 |
-
try:
|
| 22 |
-
super(genLibrary, self).__init__()
|
| 23 |
-
uic.loadUi(GlobalSettings.appdir + 'ui/generate_library.ui', self)
|
| 24 |
-
self.setWindowTitle('Generate Library')
|
| 25 |
-
self.setWindowIcon(Qt.QIcon(GlobalSettings.appdir + 'cas9image.ico'))
|
| 26 |
-
|
| 27 |
-
groupbox_style = """
|
| 28 |
-
QGroupBox:title{subcontrol-origin: margin;
|
| 29 |
-
left: 10px;
|
| 30 |
-
padding: 0 5px 0 5px;}
|
| 31 |
-
QGroupBox#Step1{border: 2px solid rgb(111,181,110);
|
| 32 |
-
border-radius: 9px;
|
| 33 |
-
font: bold 14pt 'Arial';
|
| 34 |
-
margin-top: 10px;}"""
|
| 35 |
-
self.Step1.setStyleSheet(groupbox_style)
|
| 36 |
-
self.Step2.setStyleSheet(groupbox_style.replace("Step1", "Step2"))
|
| 37 |
-
self.Step3.setStyleSheet(groupbox_style.replace("Step1", "Step3"))
|
| 38 |
-
self.Step4.setStyleSheet(groupbox_style.replace("Step1", "Step4"))
|
| 39 |
-
|
| 40 |
-
self.cancel_button.clicked.connect(self.cancel_function)
|
| 41 |
-
self.BrowseButton.clicked.connect(self.browse_function)
|
| 42 |
-
self.submit_button.clicked.connect(self.submit_data)
|
| 43 |
-
self.progressBar.setValue(0)
|
| 44 |
-
|
| 45 |
-
self.anno_data = dict()
|
| 46 |
-
self.kegg_nonKegg = ''
|
| 47 |
-
self.gen_lib_dict = dict()
|
| 48 |
-
self.cspr_data = dict()
|
| 49 |
-
self.Output = dict()
|
| 50 |
-
self.off_tol = .05
|
| 51 |
-
self.off_max_misMatch = 4
|
| 52 |
-
self.off_target_running = False
|
| 53 |
-
self.parser = CSPRparser("")
|
| 54 |
-
|
| 55 |
-
# set the numbers for the num genes combo box item
|
| 56 |
-
for i in range(10):
|
| 57 |
-
self.numGenescomboBox.addItem(str(i + 1))
|
| 58 |
-
|
| 59 |
-
# set the numbers for the minOn combo box
|
| 60 |
-
for i in range(19, 70):
|
| 61 |
-
self.minON_comboBox.addItem(str(i + 1))
|
| 62 |
-
|
| 63 |
-
scale_ui(self, custom_scale_width=950, custom_scale_height=500)
|
| 64 |
-
|
| 65 |
-
except Exception as e:
|
| 66 |
-
show_error("Error initializing generate library class.", e)
|
| 67 |
-
|
| 68 |
-
# this function launches the window
|
| 69 |
-
# Parameters:
|
| 70 |
-
# annotation_data: a dictionary that has the data for the annotations searched for
|
| 71 |
-
# currently MainWindow's searches dict is passed into this
|
| 72 |
-
# org_file: the cspr_file that pertains to the organism that user is using at the time
|
| 73 |
-
# anno_type: whether the user is using KEGG or another type of annotation file
|
| 74 |
-
def launch(self, annotation_data, org_file, anno_type):
|
| 75 |
-
try:
|
| 76 |
-
self.cspr_file = org_file
|
| 77 |
-
self.db_file = org_file[:org_file.find('.')] + '_repeats.db'
|
| 78 |
-
self.anno_data = annotation_data
|
| 79 |
-
self.kegg_nonKegg = anno_type
|
| 80 |
-
self.process = QtCore.QProcess()
|
| 81 |
-
self.parser.fileName = org_file
|
| 82 |
-
|
| 83 |
-
# setting the path and file name fields
|
| 84 |
-
index1 = self.cspr_file.find('.')
|
| 85 |
-
if platform.system() == "Windows":
|
| 86 |
-
index2 = self.cspr_file.rfind('\\')
|
| 87 |
-
else:
|
| 88 |
-
index2 = self.cspr_file.rfind('/')
|
| 89 |
-
|
| 90 |
-
self.filename_input.setText(self.cspr_file[index2 + 1:index1] + '_lib')
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
if platform.system() == "Windows":
|
| 94 |
-
self.output_path.setText(GlobalSettings.CSPR_DB + "\\")
|
| 95 |
-
else:
|
| 96 |
-
self.output_path.setText(GlobalSettings.CSPR_DB + "/")
|
| 97 |
-
|
| 98 |
-
# depending on the type of file, build the dictionary accordingly
|
| 99 |
-
self.build_dict_non_kegg()
|
| 100 |
-
|
| 101 |
-
# get the gRNA data from the cspr file
|
| 102 |
-
self.cspr_data = self.parser.gen_lib_parser(self.gen_lib_dict, GlobalSettings.mainWindow.endoChoice.currentText())
|
| 103 |
-
self.get_endo_data()
|
| 104 |
-
|
| 105 |
-
center_ui(self)
|
| 106 |
-
self.show()
|
| 107 |
-
self.activateWindow()
|
| 108 |
-
except Exception as e:
|
| 109 |
-
show_error("Error in launch() in generate library.", e)
|
| 110 |
-
|
| 111 |
-
def get_endo_data(self):
|
| 112 |
-
try:
|
| 113 |
-
f = open(GlobalSettings.appdir + "CASPERinfo")
|
| 114 |
-
self.endo_data = {}
|
| 115 |
-
while True:
|
| 116 |
-
line = f.readline()
|
| 117 |
-
if line.startswith('ENDONUCLEASES'):
|
| 118 |
-
while True:
|
| 119 |
-
line = f.readline()
|
| 120 |
-
line = line.replace("\n","")
|
| 121 |
-
if (line[0] == "-"):
|
| 122 |
-
break
|
| 123 |
-
line_tokened = line.split(";")
|
| 124 |
-
if len(line_tokened) == 10:
|
| 125 |
-
endo = line_tokened[0]
|
| 126 |
-
five_length = line_tokened[2]
|
| 127 |
-
seed_length = line_tokened[3]
|
| 128 |
-
three_length = line_tokened[4]
|
| 129 |
-
prime = line_tokened[5]
|
| 130 |
-
hsu = line_tokened[9]
|
| 131 |
-
self.endo_data[endo] = [int(five_length) + int(three_length) + int(seed_length), prime, "MATRIX:" + hsu]
|
| 132 |
-
|
| 133 |
-
break
|
| 134 |
-
f.close()
|
| 135 |
-
except Exception as e:
|
| 136 |
-
show_error("Error in get_endo_data() in generate library.", e)
|
| 137 |
-
|
| 138 |
-
# this is here in case the user clicks 'x' instead of cancel. Just calls the cancel function
|
| 139 |
-
def closeEvent(self, event):
|
| 140 |
-
try:
|
| 141 |
-
closeWindow = self.cancel_function()
|
| 142 |
-
|
| 143 |
-
# if the user is doing OT and does not decide to cancel it ignore the event
|
| 144 |
-
if closeWindow == -2:
|
| 145 |
-
event.ignore()
|
| 146 |
-
else:
|
| 147 |
-
event.accept()
|
| 148 |
-
except Exception as e:
|
| 149 |
-
show_error("Error in closeEvent() in generate library.", e)
|
| 150 |
-
|
| 151 |
-
# this function takes all of the cspr data and compresses it again for off-target usage
|
| 152 |
-
def compress_file_off(self):
|
| 153 |
-
try:
|
| 154 |
-
if platform.system() == "Windows":
|
| 155 |
-
file = GlobalSettings.CSPR_DB + "\\off_input.txt"
|
| 156 |
-
else:
|
| 157 |
-
file = GlobalSettings.CSPR_DB + "/off_input.txt"
|
| 158 |
-
f = open(file, 'w')
|
| 159 |
-
for gene in self.cspr_data:
|
| 160 |
-
for j in range(len(self.cspr_data[gene])):
|
| 161 |
-
loc = self.cspr_data[gene][j][0]
|
| 162 |
-
seq = self.cspr_data[gene][j][1]
|
| 163 |
-
pam = self.cspr_data[gene][j][2]
|
| 164 |
-
score = self.cspr_data[gene][j][3]
|
| 165 |
-
strand = self.cspr_data[gene][j][4]
|
| 166 |
-
output = str(loc) + ';' + str(seq) + ';' + str(pam) + ';' + str(score) + ';' + str(strand)
|
| 167 |
-
f.write(output + '\n')
|
| 168 |
-
f.close()
|
| 169 |
-
except Exception as e:
|
| 170 |
-
show_error("Error in compress_file_off() in generate library.", e)
|
| 171 |
-
|
| 172 |
-
# this function parses the temp_off file, which holds the off-target analysis results
|
| 173 |
-
# it also updates each target in the cspr_data dictionary to replace the endo with the target's results in off-target
|
| 174 |
-
def parse_off_file(self):
|
| 175 |
-
try:
|
| 176 |
-
if platform.system() == "Windows":
|
| 177 |
-
file = GlobalSettings.CSPR_DB + "\\temp_off.txt"
|
| 178 |
-
else:
|
| 179 |
-
file = GlobalSettings.CSPR_DB + "/temp_off.txt"
|
| 180 |
-
f = open(file, "r")
|
| 181 |
-
file_data = f.read().split('\n')
|
| 182 |
-
f.close()
|
| 183 |
-
scoreDict = dict()
|
| 184 |
-
|
| 185 |
-
# get the data from the file
|
| 186 |
-
for i in range(len(file_data)):
|
| 187 |
-
if file_data[i] == 'AVG OUTPUT':
|
| 188 |
-
continue
|
| 189 |
-
elif file_data[i] != '':
|
| 190 |
-
buffer = file_data[i].split(':')
|
| 191 |
-
scoreDict[buffer[0]] = buffer[1]
|
| 192 |
-
|
| 193 |
-
# update cspr_Data
|
| 194 |
-
for gene in self.cspr_data:
|
| 195 |
-
for i in range(len(self.cspr_data[gene])):
|
| 196 |
-
tempTuple = (self.cspr_data[gene][i][0], self.cspr_data[gene][i][1], self.cspr_data[gene][i][2], self.cspr_data[gene][i][3], self.cspr_data[gene][i][4], scoreDict[self.cspr_data[gene][i][1]])
|
| 197 |
-
self.cspr_data[gene][i] = tempTuple
|
| 198 |
-
except Exception as e:
|
| 199 |
-
show_error("Error in parse_off_file() in generate library.", e)
|
| 200 |
-
|
| 201 |
-
# this function runs the off_target command
|
| 202 |
-
# NOTE: some changes may be needed to get it to work with other OS besides windows
|
| 203 |
-
def get_offTarget_data(self, num_targets, minScore, spaceValue, output_file, fiveseq):
|
| 204 |
-
try:
|
| 205 |
-
self.perc = False
|
| 206 |
-
self.bool_temp = False
|
| 207 |
-
self.running = False
|
| 208 |
-
|
| 209 |
-
# when finished, parse the off file, and then generate the lib
|
| 210 |
-
def finished():
|
| 211 |
-
if self.off_target_running:
|
| 212 |
-
self.progressBar.setValue(100)
|
| 213 |
-
self.parse_off_file()
|
| 214 |
-
did_work = self.generate(num_targets, minScore, spaceValue, output_file, fiveseq)
|
| 215 |
-
self.off_target_running = False
|
| 216 |
-
#self.process.kill()
|
| 217 |
-
if did_work != -1:
|
| 218 |
-
self.cancel_function()
|
| 219 |
-
show_message(
|
| 220 |
-
fontSize=12,
|
| 221 |
-
icon=QtWidgets.QMessageBox.Icon.Information,
|
| 222 |
-
title="Library Generated!",
|
| 223 |
-
message="CASPER has finished generating your library!"
|
| 224 |
-
)
|
| 225 |
-
os.remove(GlobalSettings.CSPR_DB + '/off_input.txt')
|
| 226 |
-
os.remove(GlobalSettings.CSPR_DB + '/temp_off.txt')
|
| 227 |
-
|
| 228 |
-
# as off-targeting outputs things, update the off-target progress bar
|
| 229 |
-
def progUpdate(p):
|
| 230 |
-
line = str(self.process.readAllStandardOutput())
|
| 231 |
-
line = line[2:]
|
| 232 |
-
line = line[:len(line) - 1]
|
| 233 |
-
if platform.system() == 'Windows':
|
| 234 |
-
for lines in filter(None, line.split(r'\r\n')):
|
| 235 |
-
if (lines.find("Running Off Target Algorithm for") != -1 and self.perc == False):
|
| 236 |
-
self.perc = True
|
| 237 |
-
if (self.perc == True and self.bool_temp == False and lines.find(
|
| 238 |
-
"Running Off Target Algorithm for") == -1):
|
| 239 |
-
lines = lines[32:]
|
| 240 |
-
lines = lines.replace("%", "")
|
| 241 |
-
if (float(lines) <= 99.5):
|
| 242 |
-
num = float(lines)
|
| 243 |
-
self.progressBar.setValue(num)
|
| 244 |
-
else:
|
| 245 |
-
self.bool_temp = True
|
| 246 |
-
else:
|
| 247 |
-
for lines in filter(None, line.split(r'\n')):
|
| 248 |
-
if (lines.find("Running Off Target Algorithm for") != -1 and self.perc == False):
|
| 249 |
-
self.perc = True
|
| 250 |
-
if (self.perc == True and self.bool_temp == False and lines.find(
|
| 251 |
-
"Running Off Target Algorithm for") == -1):
|
| 252 |
-
lines = lines[32:]
|
| 253 |
-
lines = lines.replace("%", "")
|
| 254 |
-
if (float(lines) <= 99.5):
|
| 255 |
-
num = float(lines)
|
| 256 |
-
self.progressBar.setValue(num)
|
| 257 |
-
else:
|
| 258 |
-
self.bool_temp = True
|
| 259 |
-
|
| 260 |
-
if platform.system() == 'Windows':
|
| 261 |
-
app_path = GlobalSettings.appdir
|
| 262 |
-
exe_path = app_path + 'OffTargetFolder\\OT_Win.exe'
|
| 263 |
-
output_path = '"' + GlobalSettings.CSPR_DB + '\\temp_off.txt" '
|
| 264 |
-
data_path = '"' + GlobalSettings.CSPR_DB + "\\off_input.txt" + '" '
|
| 265 |
-
elif platform.system() == 'Linux':
|
| 266 |
-
app_path = GlobalSettings.appdir.replace('\\', '/')
|
| 267 |
-
exe_path = app_path + r'OffTargetFolder/OT_Lin'
|
| 268 |
-
output_path = '"' + GlobalSettings.CSPR_DB + '/temp_off.txt" '
|
| 269 |
-
data_path = '"' + GlobalSettings.CSPR_DB + "/off_input.txt" + '" '
|
| 270 |
-
else:
|
| 271 |
-
app_path = GlobalSettings.appdir.replace('\\', '/')
|
| 272 |
-
exe_path = app_path + r'OffTargetFolder/OT_Mac'
|
| 273 |
-
output_path = '"' + GlobalSettings.CSPR_DB + '/temp_off.txt" '
|
| 274 |
-
data_path = '"' + GlobalSettings.CSPR_DB + "/off_input.txt" + '" '
|
| 275 |
-
exe_path = '"' + exe_path + '" '
|
| 276 |
-
cspr_path = '"' + self.cspr_file + '" '
|
| 277 |
-
db_path = '"' + self.db_file + '" '
|
| 278 |
-
filename = output_path
|
| 279 |
-
filename = filename[:len(filename) - 1]
|
| 280 |
-
filename = filename[1:]
|
| 281 |
-
filename = filename.replace('"', '')
|
| 282 |
-
CASPER_info_path = '"' + app_path + 'CASPERinfo' +'" '
|
| 283 |
-
num_of_mismathes = self.off_max_misMatch
|
| 284 |
-
tolerance = self.off_tol # create command string
|
| 285 |
-
endo = '"' + GlobalSettings.mainWindow.endoChoice.currentText() + '" '
|
| 286 |
-
detailed_output = " False "
|
| 287 |
-
avg_output = "True"
|
| 288 |
-
hsu = ' "' + self.endo_data[GlobalSettings.mainWindow.endoChoice.currentText()][2] + '"'
|
| 289 |
-
|
| 290 |
-
# set the off_target_running to true, to keep the user from closing the window while it is running
|
| 291 |
-
self.off_target_running = True
|
| 292 |
-
|
| 293 |
-
cmd = exe_path + data_path + endo + cspr_path + db_path + output_path + CASPER_info_path + str(
|
| 294 |
-
num_of_mismathes) + ' ' + str(tolerance) + detailed_output + avg_output + hsu
|
| 295 |
-
|
| 296 |
-
if platform.system() == 'Windows':
|
| 297 |
-
cmd = cmd.replace('/', '\\')
|
| 298 |
-
self.process.readyReadStandardOutput.connect(partial(progUpdate, self.process))
|
| 299 |
-
self.process.readyReadStandardError.connect(partial(progUpdate, self.process))
|
| 300 |
-
self.progressBar.setValue(0)
|
| 301 |
-
QtCore.QTimer.singleShot(100, partial(self.process.start, cmd))
|
| 302 |
-
self.process.finished.connect(finished)
|
| 303 |
-
except Exception as e:
|
| 304 |
-
show_error("Error in get_offTarget_data() in generate library.", e)
|
| 305 |
-
|
| 306 |
-
# submit function
|
| 307 |
-
# this function takes all of the input from the window, and calls the generate function
|
| 308 |
-
# Still need to add the checks for 5' seq, and the percentage thing
|
| 309 |
-
def submit_data(self):
|
| 310 |
-
try:
|
| 311 |
-
if self.off_target_running:
|
| 312 |
-
return
|
| 313 |
-
output_file = self.output_path.text() + self.filename_input.text()
|
| 314 |
-
|
| 315 |
-
minScore = int(self.minON_comboBox.currentText())
|
| 316 |
-
num_targets = int(self.numGenescomboBox.currentText())
|
| 317 |
-
fiveseq = ''
|
| 318 |
-
|
| 319 |
-
# error check for csv files
|
| 320 |
-
if output_file.endswith('.txt'):
|
| 321 |
-
output_file = output_file.replace('.txt', '.csv')
|
| 322 |
-
elif not output_file.endswith('.txt') and not output_file.endswith('.csv'):
|
| 323 |
-
output_file = output_file + '.csv'
|
| 324 |
-
|
| 325 |
-
# error checking for the space value
|
| 326 |
-
# if they enter nothing, default to 15 and also make sure it's actually a digit
|
| 327 |
-
if self.space_line_edit.text() == '':
|
| 328 |
-
spaceValue = 15
|
| 329 |
-
elif self.space_line_edit.text().isdigit():
|
| 330 |
-
spaceValue = int(self.space_line_edit.text())
|
| 331 |
-
elif not self.space_line_edit.text().isdigit():
|
| 332 |
-
show_message(
|
| 333 |
-
fontSize=12,
|
| 334 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 335 |
-
title="Error",
|
| 336 |
-
message="Please enter integers only for space between guides."
|
| 337 |
-
)
|
| 338 |
-
return
|
| 339 |
-
# if space value is more than 200, default to 200
|
| 340 |
-
if spaceValue > 200:
|
| 341 |
-
spaceValue = 200
|
| 342 |
-
elif spaceValue < 0:
|
| 343 |
-
show_message(
|
| 344 |
-
fontSize=12,
|
| 345 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 346 |
-
title="Error",
|
| 347 |
-
message="Please enter a space-value that is 0 or greater."
|
| 348 |
-
)
|
| 349 |
-
return
|
| 350 |
-
|
| 351 |
-
if self.find_off_Checkbox.isChecked():
|
| 352 |
-
self.compress_file_off()
|
| 353 |
-
|
| 354 |
-
# get the fiveprimseq data and error check it
|
| 355 |
-
if self.fiveprimeseq.text() != '' and self.fiveprimeseq.text().isalpha():
|
| 356 |
-
fiveseq = self.fiveprimeseq.text()
|
| 357 |
-
elif self.fiveprimeseq.text() != '' and not self.fiveprimeseq.text().isalpha():
|
| 358 |
-
show_message(
|
| 359 |
-
fontSize=12,
|
| 360 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 361 |
-
title="Error",
|
| 362 |
-
message="Please make sure only the letters A, T, G, or C are added into 5' End specificity box."
|
| 363 |
-
)
|
| 364 |
-
return
|
| 365 |
-
|
| 366 |
-
# get the targeting range data, and error check it here
|
| 367 |
-
if not self.start_target_range.text().isdigit() or not self.end_target_range.text().isdigit():
|
| 368 |
-
show_message(
|
| 369 |
-
fontSize=12,
|
| 370 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 371 |
-
title="Error",
|
| 372 |
-
message="Error: Please make sure that the start and end target ranges are numbers only. Please make sure that start is 0 or greater, and end is 100 or less. "
|
| 373 |
-
)
|
| 374 |
-
return
|
| 375 |
-
elif int(self.start_target_range.text()) >= int(self.end_target_range.text()):
|
| 376 |
-
show_message(
|
| 377 |
-
fontSize=12,
|
| 378 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 379 |
-
title="Error",
|
| 380 |
-
message="Please make sure that the start number is always less than the end number"
|
| 381 |
-
)
|
| 382 |
-
return
|
| 383 |
-
|
| 384 |
-
# if they check Off-Targeting
|
| 385 |
-
if self.find_off_Checkbox.isChecked():
|
| 386 |
-
# make sure its a digit
|
| 387 |
-
if self.maxOFF_comboBox.text() == '' or not self.maxOFF_comboBox.text().isdigit() and '.' not in self.maxOFF_comboBox.text():
|
| 388 |
-
show_message(
|
| 389 |
-
fontSize=12,
|
| 390 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 391 |
-
title="Error",
|
| 392 |
-
message="Please enter only numbers for Maximum Off-Target Score. It cannot be left blank"
|
| 393 |
-
)
|
| 394 |
-
return
|
| 395 |
-
else:
|
| 396 |
-
# make sure it between 0 and .5
|
| 397 |
-
if not 0.0 < float(self.maxOFF_comboBox.text()) <= .5:
|
| 398 |
-
show_message(
|
| 399 |
-
fontSize=12,
|
| 400 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 401 |
-
title="Error",
|
| 402 |
-
message="Please enter a max off-target score between 0 and 0.5!"
|
| 403 |
-
)
|
| 404 |
-
return
|
| 405 |
-
# compress the data, and then run off-targeting
|
| 406 |
-
self.compress_file_off()
|
| 407 |
-
self.get_offTarget_data(num_targets, minScore, spaceValue, output_file, fiveseq)
|
| 408 |
-
else:
|
| 409 |
-
# actually call the generate function
|
| 410 |
-
did_work = self.generate(num_targets, minScore, spaceValue, output_file, fiveseq)
|
| 411 |
-
|
| 412 |
-
if did_work != -1:
|
| 413 |
-
self.cancel_function()
|
| 414 |
-
show_message(
|
| 415 |
-
fontSize=12,
|
| 416 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 417 |
-
title="Library Generated!",
|
| 418 |
-
message="CASPER has finished generating your library!"
|
| 419 |
-
)
|
| 420 |
-
except Exception as e:
|
| 421 |
-
show_error("Error in submit_data() in generate library.", e)
|
| 422 |
-
|
| 423 |
-
# clears everything and hides the window
|
| 424 |
-
def cancel_function(self):
|
| 425 |
-
try:
|
| 426 |
-
if self.off_target_running:
|
| 427 |
-
msgBox = QtWidgets.QMessageBox()
|
| 428 |
-
msgBox.setStyleSheet("font: " + str(12) + "pt 'Arial'")
|
| 429 |
-
msgBox.setIcon(QtWidgets.QMessageBox.Icon.Question)
|
| 430 |
-
msgBox.setWindowTitle("Off-Targeting is running")
|
| 431 |
-
msgBox.setText(
|
| 432 |
-
"Off-Targetting is running. Closing this window will cancel that process, and return to the main window. .\n Do you wish to continue?")
|
| 433 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Yes)
|
| 434 |
-
msgBox.addButton(QtWidgets.QMessageBox.StandardButton.No)
|
| 435 |
-
msgBox.exec()
|
| 436 |
-
|
| 437 |
-
if (msgBox.result() == QtWidgets.QMessageBox.No):
|
| 438 |
-
return -2
|
| 439 |
-
else:
|
| 440 |
-
self.off_target_running = False
|
| 441 |
-
self.process.kill()
|
| 442 |
-
|
| 443 |
-
self.cspr_file = ''
|
| 444 |
-
self.anno_data = list()
|
| 445 |
-
|
| 446 |
-
self.filename_input.setText('')
|
| 447 |
-
self.output_path.setText('')
|
| 448 |
-
|
| 449 |
-
self.gen_lib_dict.clear()
|
| 450 |
-
self.cspr_data.clear()
|
| 451 |
-
self.Output.clear()
|
| 452 |
-
|
| 453 |
-
self.start_target_range.setText('0')
|
| 454 |
-
self.end_target_range.setText('100')
|
| 455 |
-
self.space_line_edit.setText('15')
|
| 456 |
-
self.find_off_Checkbox.setChecked(False)
|
| 457 |
-
self.modifyParamscheckBox.setChecked(False)
|
| 458 |
-
self.maxOFF_comboBox.setText('')
|
| 459 |
-
self.fiveprimeseq.setText('')
|
| 460 |
-
self.off_target_running = False
|
| 461 |
-
self.progressBar.setValue(0)
|
| 462 |
-
|
| 463 |
-
self.hide()
|
| 464 |
-
except Exception as e:
|
| 465 |
-
show_error("Error in cancel_function() in generate library.", e)
|
| 466 |
-
|
| 467 |
-
# allows the user to browse for a folder
|
| 468 |
-
# stores their selection in the output_path line edit
|
| 469 |
-
def browse_function(self):
|
| 470 |
-
try:
|
| 471 |
-
if self.off_target_running:
|
| 472 |
-
return
|
| 473 |
-
# get the folder
|
| 474 |
-
filed = QtWidgets.QFileDialog()
|
| 475 |
-
mydir = QtWidgets.QFileDialog.getExistingDirectory(filed, "Open a Folder",
|
| 476 |
-
GlobalSettings.CSPR_DB, QtWidgets.QFileDialog.ShowDirsOnly)
|
| 477 |
-
if(os.path.isdir(mydir) == False):
|
| 478 |
-
return
|
| 479 |
-
|
| 480 |
-
# make sure to append the '/' to the folder path
|
| 481 |
-
if platform.system() == "Windwos":
|
| 482 |
-
self.output_path.setText(mydir + "\\")
|
| 483 |
-
else:
|
| 484 |
-
self.output_path.setText(mydir + "/")
|
| 485 |
-
except Exception as e:
|
| 486 |
-
show_error("Error in browse_function() in generate library.", e)
|
| 487 |
-
|
| 488 |
-
# this function builds the dictionary that is used in the generate function
|
| 489 |
-
# this is the version that builds it from data from feature_table, gbff, or gff
|
| 490 |
-
# builds it exactly as Brian built it in the files given
|
| 491 |
-
def build_dict_non_kegg(self):
|
| 492 |
-
try:
|
| 493 |
-
for tuple in self.anno_data:
|
| 494 |
-
chrom = tuple[0]
|
| 495 |
-
feature = tuple[1]
|
| 496 |
-
feature_id = get_id(feature)
|
| 497 |
-
feature_name = get_name(feature)
|
| 498 |
-
feature_desc = get_description(feature)
|
| 499 |
-
### Order: chromosome number, gene start, gene end, dir of gene, gene description, gene name/locus tag
|
| 500 |
-
self.gen_lib_dict[feature_name] = [chrom,int(feature.location.start),int(feature.location.end),get_strand(feature),get_description(feature),get_name(feature)]
|
| 501 |
-
except Exception as e:
|
| 502 |
-
show_error("Error in build_dict_non_kegg() in generate library.", e)
|
| 503 |
-
|
| 504 |
-
# generate function taken from Brian's code
|
| 505 |
-
def generate(self,num_targets_per_gene, score_limit, space, output_file, fiveseq):
|
| 506 |
-
try:
|
| 507 |
-
deletedDict = dict()
|
| 508 |
-
|
| 509 |
-
# check and see if we need to search based on target_range
|
| 510 |
-
startNum = float(self.start_target_range.text())
|
| 511 |
-
endNum = float(self.end_target_range.text())
|
| 512 |
-
checkStartandEndBool = False
|
| 513 |
-
if startNum != 0.0 or endNum != 100.0:
|
| 514 |
-
if startNum >= 0.0 and endNum <= 100.0:
|
| 515 |
-
startNum = startNum / 100
|
| 516 |
-
endNum = endNum / 100
|
| 517 |
-
checkStartandEndBool = True
|
| 518 |
-
else:
|
| 519 |
-
show_message(
|
| 520 |
-
fontSize=12,
|
| 521 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 522 |
-
title="Invalid Targeting Range:",
|
| 523 |
-
message="Please select a targeting range between 0 and 100."
|
| 524 |
-
)
|
| 525 |
-
return -1
|
| 526 |
-
|
| 527 |
-
for gene in self.gen_lib_dict:
|
| 528 |
-
target_list = self.cspr_data[gene] # Gets the gRNAs for given gene
|
| 529 |
-
|
| 530 |
-
#target_list = chrom_list[k:l+1]
|
| 531 |
-
# Reverse the target list if the gene is on negative strand:
|
| 532 |
-
if self.gen_lib_dict[gene][3] == "-":
|
| 533 |
-
target_list.reverse()
|
| 534 |
-
|
| 535 |
-
# Filter out the guides with low scores and long strings of T's
|
| 536 |
-
# also store the ones deleted if the user selects 'modify search parameters'
|
| 537 |
-
if self.modifyParamscheckBox.isChecked():
|
| 538 |
-
deletedDict[gene] = list()
|
| 539 |
-
for i in range(len(target_list) - 1, -1, -1): ### Start at end and move backwards through list
|
| 540 |
-
# check the target_range here
|
| 541 |
-
if int(target_list[i][3]) < int(score_limit):
|
| 542 |
-
if self.modifyParamscheckBox.isChecked():
|
| 543 |
-
deletedDict[gene].append(target_list[i])
|
| 544 |
-
target_list.pop(i)
|
| 545 |
-
# check for gRNAs with poly T regions here
|
| 546 |
-
elif re.search("T{5,10}", target_list[i][1]) is not None:
|
| 547 |
-
if self.modifyParamscheckBox.isChecked():
|
| 548 |
-
deletedDict[gene].append(target_list[i])
|
| 549 |
-
target_list.pop(i)
|
| 550 |
-
|
| 551 |
-
# check for the fiveseq
|
| 552 |
-
if fiveseq != '':
|
| 553 |
-
for i in range(len(target_list) - 1, -1, -1): ### Start at end and move backwards through list
|
| 554 |
-
if not target_list[i][1].startswith(fiveseq.upper()):
|
| 555 |
-
if self.modifyParamscheckBox.isChecked():
|
| 556 |
-
deletedDict[gene].append(target_list[i])
|
| 557 |
-
target_list.pop(i)
|
| 558 |
-
# check the target range here
|
| 559 |
-
if checkStartandEndBool:
|
| 560 |
-
for i in range(len(target_list) - 1, -1, -1):
|
| 561 |
-
totalDistance = self.gen_lib_dict[gene][2] - self.gen_lib_dict[gene][1]
|
| 562 |
-
target_loc = abs(int(target_list[i][0])) - int(self.gen_lib_dict[gene][1])
|
| 563 |
-
myRatio = target_loc / totalDistance
|
| 564 |
-
|
| 565 |
-
if not (startNum <= myRatio <= endNum):
|
| 566 |
-
if self.modifyParamscheckBox.isChecked():
|
| 567 |
-
deletedDict[gene].append(target_list[i])
|
| 568 |
-
target_list.pop(i)
|
| 569 |
-
# if the user selected off-targeting, check to see that the targets do not exceed the selected max score
|
| 570 |
-
if self.find_off_Checkbox.isChecked():
|
| 571 |
-
maxScore = float(self.maxOFF_comboBox.text())
|
| 572 |
-
for i in range(len(target_list) - 1, -1, -1):
|
| 573 |
-
if maxScore < float(target_list[i][5]):
|
| 574 |
-
if self.modifyParamscheckBox.isChecked():
|
| 575 |
-
deletedDict[gene].append(target_list[i])
|
| 576 |
-
target_list.pop(i)
|
| 577 |
-
# Now generating the targets
|
| 578 |
-
self.Output[gene] = list()
|
| 579 |
-
i = 0
|
| 580 |
-
vec_index = 0
|
| 581 |
-
prev_target = (0, "xyz", 'abc', 1, "-")
|
| 582 |
-
while i < num_targets_per_gene:
|
| 583 |
-
# select the first five targets with the score and space filter that is set in the beginning
|
| 584 |
-
if len(target_list) == 0 or vec_index >= len(target_list):
|
| 585 |
-
break
|
| 586 |
-
while abs(int(target_list[vec_index][0]) - int(prev_target[0])) < int(space):
|
| 587 |
-
if target_list[vec_index][3] > prev_target[3] and prev_target != (0,"xyz", "abc", 1, "-"):
|
| 588 |
-
self.Output[gene].remove(prev_target)
|
| 589 |
-
self.Output[gene].append(target_list[vec_index])
|
| 590 |
-
prev_target = target_list[vec_index]
|
| 591 |
-
vec_index += 1
|
| 592 |
-
# check and see if there will be a indexing error
|
| 593 |
-
if vec_index >= len(target_list) - 1:
|
| 594 |
-
vec_index = vec_index - 1
|
| 595 |
-
break
|
| 596 |
-
# Add the new target to the output and add another to i
|
| 597 |
-
self.Output[gene].append(target_list[vec_index])
|
| 598 |
-
prev_target = target_list[vec_index]
|
| 599 |
-
i += 1
|
| 600 |
-
vec_index += 1
|
| 601 |
-
|
| 602 |
-
# if the user selects modify search parameters, go through and check to see if each one has the number of targets that the user wanted
|
| 603 |
-
# if not, append from the deletedDict until they do
|
| 604 |
-
if self.modifyParamscheckBox.isChecked():
|
| 605 |
-
for gene in self.Output:
|
| 606 |
-
if len(self.Output[gene]) < num_targets_per_gene:
|
| 607 |
-
for i in range(len(deletedDict[gene])):
|
| 608 |
-
if len(self.Output[gene]) == num_targets_per_gene:
|
| 609 |
-
break
|
| 610 |
-
else:
|
| 611 |
-
loc = deletedDict[gene][i][0]
|
| 612 |
-
seq = deletedDict[gene][i][1]
|
| 613 |
-
pam = deletedDict[gene][i][2]
|
| 614 |
-
score = deletedDict[gene][i][3]
|
| 615 |
-
strand = deletedDict[gene][i][4] + '*'
|
| 616 |
-
endo = deletedDict[gene][i][5]
|
| 617 |
-
self.Output[gene].append((loc, seq, pam, score, strand, endo))
|
| 618 |
-
|
| 619 |
-
# Now output to the file
|
| 620 |
-
try:
|
| 621 |
-
f = open(output_file, 'w')
|
| 622 |
-
# if OT checked
|
| 623 |
-
if self.find_off_Checkbox.isChecked():
|
| 624 |
-
f.write('Gene Name,Sequence,On-Target Score,Off-Target Score,Location,PAM,Strand\n')
|
| 625 |
-
elif not self.find_off_Checkbox.isChecked():
|
| 626 |
-
f.write('Gene Name,Sequence,On-Target Score,Location,PAM,Strand\n')
|
| 627 |
-
|
| 628 |
-
for gene in self.Output:
|
| 629 |
-
i = 0
|
| 630 |
-
gene_name = self.gen_lib_dict[gene][-1]
|
| 631 |
-
for target in self.Output[gene]:
|
| 632 |
-
# check to see if the target did not match the user's parameters and they selected 'modify'
|
| 633 |
-
# if the target has an error, put 2 asterisks in front of the target sequence
|
| 634 |
-
if '*' in target[4]:
|
| 635 |
-
tag_id = "**" + gene_name + "-" + str(i + 1)
|
| 636 |
-
else:
|
| 637 |
-
tag_id = gene_name + "-" + str(i + 1)
|
| 638 |
-
i += 1
|
| 639 |
-
|
| 640 |
-
tag_id = tag_id.replace(',', '')
|
| 641 |
-
|
| 642 |
-
# if OT checked
|
| 643 |
-
if self.find_off_Checkbox.isChecked():
|
| 644 |
-
f.write(tag_id + ',' + target[1] + ',' + str(target[3]) + ',' + str(target[5]) + ',' + str(abs(int(target[0]))) + ',' + target[2] + ',' + target[4][0] + '\n')
|
| 645 |
-
# if OT not checked
|
| 646 |
-
elif not self.find_off_Checkbox.isChecked():
|
| 647 |
-
f.write(tag_id + ',' + target[1] + ',' + str(target[3]) + ',' + str(abs(int(target[0]))) + ',' + target[2] + ',' + target[4][0] + '\n')
|
| 648 |
-
|
| 649 |
-
f.close()
|
| 650 |
-
except PermissionError:
|
| 651 |
-
show_message(
|
| 652 |
-
fontSize=12,
|
| 653 |
-
icon=QtWidgets.QMessageBox.Icon.Critical,
|
| 654 |
-
title="File Cannot Open",
|
| 655 |
-
message="This file cannot be opened. Please make sure that the file is not opened elsewhere and try again."
|
| 656 |
-
)
|
| 657 |
-
return -1
|
| 658 |
-
except Exception as e:
|
| 659 |
-
print(e)
|
| 660 |
-
return
|
| 661 |
-
except Exception as e:
|
| 662 |
-
show_error("Error in generate() in generate library.", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|